@onlooker-community/ecosystem 0.9.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/.claude-plugin/marketplace.json +39 -1
  2. package/.claude-plugin/plugin.json +2 -2
  3. package/.github/copilot-instructions.md +46 -0
  4. package/.github/workflows/coverage.yml +78 -0
  5. package/.github/workflows/release.yml +24 -8
  6. package/.github/workflows/test.yml +3 -0
  7. package/.markdownlintignore +3 -0
  8. package/.release-please-manifest.json +4 -1
  9. package/CHANGELOG.md +44 -0
  10. package/README.md +57 -13
  11. package/config.json +6 -1
  12. package/docs/adr/001-claude-code-hooks-as-integration-surface.md +43 -0
  13. package/docs/adr/002-centralized-jsonl-event-log.md +39 -0
  14. package/docs/adr/003-ulid-over-uuid.md +40 -0
  15. package/docs/adr/004-plugin-config-with-settings-overlay.md +34 -0
  16. package/docs/architecture.md +117 -0
  17. package/hooks/hooks.json +4 -0
  18. package/package.json +13 -7
  19. package/plugins/archivist/.claude-plugin/plugin.json +14 -0
  20. package/plugins/archivist/CHANGELOG.md +8 -0
  21. package/plugins/archivist/README.md +105 -0
  22. package/plugins/archivist/config.json +18 -0
  23. package/plugins/archivist/hooks/hooks.json +35 -0
  24. package/plugins/archivist/scripts/hooks/archivist-extract.sh +238 -0
  25. package/plugins/archivist/scripts/hooks/archivist-inject.sh +159 -0
  26. package/plugins/archivist/scripts/lib/archivist-config.sh +66 -0
  27. package/plugins/archivist/scripts/lib/archivist-project-key.sh +91 -0
  28. package/plugins/archivist/scripts/lib/archivist-storage.sh +215 -0
  29. package/plugins/archivist/scripts/lib/archivist-ulid.sh +52 -0
  30. package/plugins/echo/.claude-plugin/plugin.json +14 -0
  31. package/plugins/echo/CHANGELOG.md +24 -0
  32. package/plugins/echo/README.md +110 -0
  33. package/plugins/echo/config.json +15 -0
  34. package/plugins/echo/docs/adr/001-echo-as-separate-plugin.md +33 -0
  35. package/plugins/echo/docs/adr/002-direct-evaluation-vs-tribunal-pipeline.md +35 -0
  36. package/plugins/echo/docs/adr/003-stop-hook-trigger.md +40 -0
  37. package/plugins/echo/hooks/hooks.json +15 -0
  38. package/plugins/echo/scripts/hooks/echo-stop-gate.sh +366 -0
  39. package/plugins/echo/scripts/lib/echo-config.sh +108 -0
  40. package/plugins/echo/scripts/lib/echo-events.sh +74 -0
  41. package/plugins/echo/scripts/lib/echo-project-key.sh +81 -0
  42. package/plugins/echo/scripts/lib/echo-ulid.sh +46 -0
  43. package/plugins/tribunal/.claude-plugin/plugin.json +20 -0
  44. package/plugins/tribunal/CHANGELOG.md +10 -0
  45. package/plugins/tribunal/README.md +134 -0
  46. package/plugins/tribunal/agents/tribunal-actor.md +35 -0
  47. package/plugins/tribunal/agents/tribunal-judge-adversarial.md +51 -0
  48. package/plugins/tribunal/agents/tribunal-judge-security.md +47 -0
  49. package/plugins/tribunal/agents/tribunal-judge-standard.md +47 -0
  50. package/plugins/tribunal/agents/tribunal-meta-judge.md +61 -0
  51. package/plugins/tribunal/config.json +50 -0
  52. package/plugins/tribunal/docs/adr/001-actor-jury-meta-gate-loop.md +40 -0
  53. package/plugins/tribunal/docs/adr/002-majority-gate-policy.md +48 -0
  54. package/plugins/tribunal/hooks/hooks.json +15 -0
  55. package/plugins/tribunal/scripts/hooks/tribunal-stop-gate.sh +267 -0
  56. package/plugins/tribunal/scripts/lib/tribunal-aggregate.sh +65 -0
  57. package/plugins/tribunal/scripts/lib/tribunal-config.sh +101 -0
  58. package/plugins/tribunal/scripts/lib/tribunal-events.sh +97 -0
  59. package/plugins/tribunal/scripts/lib/tribunal-gate.sh +111 -0
  60. package/plugins/tribunal/scripts/lib/tribunal-jury.sh +102 -0
  61. package/plugins/tribunal/scripts/lib/tribunal-project-key.sh +84 -0
  62. package/plugins/tribunal/scripts/lib/tribunal-rubric.sh +153 -0
  63. package/plugins/tribunal/scripts/lib/tribunal-ulid.sh +50 -0
  64. package/plugins/tribunal/scripts/lib/tribunal-verdict.sh +127 -0
  65. package/plugins/tribunal/skills/tribunal/SKILL.md +129 -0
  66. package/release-please-config.json +43 -5
  67. package/scripts/coverage/bash-coverage.mjs +169 -0
  68. package/scripts/coverage/format-comment.mjs +120 -0
  69. package/scripts/coverage/run-coverage.mjs +151 -0
  70. package/scripts/hooks/agent-spawn-tracker.sh +4 -4
  71. package/scripts/hooks/prompt-rule-injector.sh +122 -0
  72. package/scripts/lib/onlooker-event.mjs +82 -10
  73. package/scripts/lib/portable-lock.sh +48 -0
  74. package/scripts/lib/prompt-rules.sh +207 -0
  75. package/scripts/lib/tool-history.sh +7 -8
  76. package/scripts/lib/validate-path.sh +4 -0
  77. package/scripts/lint/check-manifests.mjs +314 -0
  78. package/scripts/lint/check-references.mjs +311 -0
  79. package/skills/list-prompt-rules/SKILL.md +15 -0
  80. package/test/bats/archivist-config-files.bats +60 -0
  81. package/test/bats/archivist-config.bats +54 -0
  82. package/test/bats/archivist-inject.bats +73 -0
  83. package/test/bats/archivist-project-key.bats +75 -0
  84. package/test/bats/archivist-storage.bats +119 -0
  85. package/test/bats/archivist-ulid.bats +36 -0
  86. package/test/bats/config.bats +10 -10
  87. package/test/bats/echo-config.bats +90 -0
  88. package/test/bats/echo-events.bats +121 -0
  89. package/test/bats/echo-project-key.bats +115 -0
  90. package/test/bats/echo-stop-hook.bats +101 -0
  91. package/test/bats/echo-ulid.bats +38 -0
  92. package/test/bats/portable-lock.bats +62 -0
  93. package/test/bats/prompt-rules.bats +269 -0
  94. package/test/bats/read-chunk-tracking.bats +73 -0
  95. package/test/bats/tool-history-tracker.bats +1 -0
  96. package/test/bats/tribunal-aggregate.bats +77 -0
  97. package/test/bats/tribunal-config.bats +86 -0
  98. package/test/bats/tribunal-events.bats +209 -0
  99. package/test/bats/tribunal-gate.bats +95 -0
  100. package/test/bats/tribunal-jury.bats +80 -0
  101. package/test/bats/tribunal-rubric.bats +119 -0
  102. package/test/bats/tribunal-stop-hook.bats +73 -0
  103. package/test/bats/tribunal-verdict.bats +71 -0
  104. package/test/bats/validate-path.bats +1 -1
  105. package/test/fixtures/hook-inputs/post-tool-use-read-chunked.json +15 -0
  106. package/test/fixtures/hook-inputs/user-prompt-submit-rule-match.json +8 -0
  107. package/test/fixtures/hook-inputs/user-prompt-submit-rule-nomatch.json +8 -0
  108. package/test/helpers/setup.bash +9 -0
  109. package/test/node/check-manifests.test.mjs +173 -0
  110. package/test/node/check-references.test.mjs +279 -0
  111. package/test/node/coverage.test.mjs +143 -0
  112. package/test/node/schema-events.test.mjs +41 -1
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env bash
2
+ # Verdict persistence for Tribunal.
3
+ #
4
+ # Writes per-iteration artifacts under:
5
+ # $ONLOOKER_DIR/tribunal/<project-key>/<task_id>/iteration-<iteration_id>/
6
+ # actor.md
7
+ # jury.json
8
+ # verdicts/<judge_id>.json
9
+ # consensus.json
10
+ # dissent.json (optional)
11
+ # meta.json
12
+ # gate.json
13
+ #
14
+ # Plus task-level files at <task_id>/{manifest,session-start,session-complete}.json.
15
+ #
16
+ # Requires tribunal-project-key.sh to be sourced.
17
+
18
+ tribunal_storage_root() {
19
+ local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
20
+ printf '%s/tribunal' "$base"
21
+ }
22
+
23
+ tribunal_project_dir() {
24
+ local key="$1"
25
+ printf '%s/%s' "$(tribunal_storage_root)" "$key"
26
+ }
27
+
28
+ tribunal_task_dir() {
29
+ local key="$1"
30
+ local task_id="$2"
31
+ printf '%s/%s' "$(tribunal_project_dir "$key")" "$task_id"
32
+ }
33
+
34
+ tribunal_iteration_dir() {
35
+ local key="$1"
36
+ local task_id="$2"
37
+ local iteration_id="$3"
38
+ printf '%s/iteration-%s' "$(tribunal_task_dir "$key" "$task_id")" "$iteration_id"
39
+ }
40
+
41
+ tribunal_init_task() {
42
+ local key="$1"
43
+ local task_id="$2"
44
+ [[ -z "$key" || -z "$task_id" ]] && return 1
45
+ mkdir -p "$(tribunal_task_dir "$key" "$task_id")" 2>/dev/null
46
+ }
47
+
48
+ tribunal_init_iteration() {
49
+ local key="$1"
50
+ local task_id="$2"
51
+ local iteration_id="$3"
52
+ [[ -z "$key" || -z "$task_id" || -z "$iteration_id" ]] && return 1
53
+ mkdir -p "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/verdicts" 2>/dev/null
54
+ }
55
+
56
+ # Write the project-level manifest (one per project key, refreshed each task).
57
+ tribunal_write_project_manifest() {
58
+ local key="$1"
59
+ local remote_url="$2"
60
+ local repo_root="$3"
61
+ [[ -z "$key" ]] && return 1
62
+ mkdir -p "$(tribunal_project_dir "$key")" 2>/dev/null
63
+
64
+ local now
65
+ now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
66
+ jq -n \
67
+ --arg key "$key" \
68
+ --arg remote "$remote_url" \
69
+ --arg root "$repo_root" \
70
+ --arg now "$now" \
71
+ '{
72
+ project_key: $key,
73
+ remote_url: (if $remote == "" then null else $remote end),
74
+ repo_root: (if $root == "" then null else $root end),
75
+ last_task_at: $now,
76
+ source: "local"
77
+ }' > "$(tribunal_project_dir "$key")/manifest.json"
78
+ }
79
+
80
+ # Write the per-task manifest with the active rubric snapshot.
81
+ tribunal_write_task_manifest() {
82
+ local key="$1"
83
+ local task_id="$2"
84
+ local task_summary="$3"
85
+ local rubric_id="$4"
86
+ local rubric_json="$5"
87
+ [[ -z "$key" || -z "$task_id" ]] && return 1
88
+ tribunal_init_task "$key" "$task_id" || return 1
89
+
90
+ local now
91
+ now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
92
+ jq -n \
93
+ --arg task_id "$task_id" \
94
+ --arg summary "$task_summary" \
95
+ --arg rubric_id "$rubric_id" \
96
+ --argjson rubric "$rubric_json" \
97
+ --arg now "$now" \
98
+ '{
99
+ task_id: $task_id,
100
+ task_summary: $summary,
101
+ rubric_id: $rubric_id,
102
+ rubric: $rubric,
103
+ started_at: $now
104
+ }' > "$(tribunal_task_dir "$key" "$task_id")/manifest.json"
105
+ }
106
+
107
+ # Append-time helpers for each per-iteration artifact. Each takes the full JSON
108
+ # blob the caller wants persisted (typically the same payload it just emitted as
109
+ # a canonical event).
110
+ tribunal_write_actor_output() {
111
+ local key="$1" task_id="$2" iteration_id="$3" body="$4"
112
+ tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
113
+ printf '%s\n' "$body" > "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/actor.md"
114
+ }
115
+
116
+ tribunal_write_iteration_artifact() {
117
+ local key="$1" task_id="$2" iteration_id="$3" name="$4" json="$5"
118
+ tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
119
+ printf '%s\n' "$json" > "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/${name}.json"
120
+ }
121
+
122
+ tribunal_write_judge_verdict() {
123
+ local key="$1" task_id="$2" iteration_id="$3" judge_id="$4" verdict_json="$5"
124
+ tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
125
+ printf '%s\n' "$verdict_json" \
126
+ > "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/verdicts/${judge_id}.json"
127
+ }
@@ -0,0 +1,129 @@
1
+ ---
2
+ name: tribunal
3
+ description: Run a task under multi-agent quality gates. Spawns the tribunal-actor subagent, a jury of typed Judges, and a Meta-Judge; aggregates verdicts under a configurable gate policy; retries the Actor with critique on rejection until acceptance or max_iterations. Use when the user explicitly wraps a task with /tribunal, or wants stronger correctness/safety review than a single pass. Emits the full tribunal.* canonical event stream.
4
+ ---
5
+
6
+ # Tribunal: Multi-Agent Execution with Quality Gates
7
+
8
+ You are orchestrating a **Tribunal** evaluation loop. A user task gets wrapped in: **Actor → Jury → Meta-Judge → Gate**, retrying the Actor with feedback until the gate passes or `max_iterations` is reached.
9
+
10
+ ## Setup
11
+
12
+ Before the loop, source the plugin's bash helpers and load config. Run this once at the start:
13
+
14
+ ```bash
15
+ set -uo pipefail
16
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-config.sh"
17
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-rubric.sh"
18
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-jury.sh"
19
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-aggregate.sh"
20
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-gate.sh"
21
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-events.sh"
22
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-verdict.sh"
23
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-project-key.sh"
24
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-ulid.sh"
25
+
26
+ tribunal_config_load "$(pwd)"
27
+ tribunal_rubric_load "$(pwd)"
28
+ ```
29
+
30
+ Parse the task description from the user's prompt arguments. If the user passed `--rubric=<id>`, use that; otherwise use `tribunal_rubric_default_id`.
31
+
32
+ Resolve the active rubric with `tribunal_rubric_get "$rubric_id"`. Validate it with `tribunal_rubric_validate "$rubric"`. If validation fails, abort with `tribunal.session.complete` outcome `aborted` and tell the user why.
33
+
34
+ ## Per-task initialization
35
+
36
+ Generate identifiers and persist task-level state:
37
+
38
+ ```bash
39
+ task_id=$(tribunal_ulid)
40
+ project_key=$(tribunal_project_key "$(pwd)")
41
+ remote=$(tribunal_project_remote_url "$(pwd)")
42
+ repo_root=$(tribunal_project_repo_root "$(pwd)")
43
+ tribunal_write_project_manifest "$project_key" "$remote" "$repo_root"
44
+ tribunal_write_task_manifest "$project_key" "$task_id" "$task_summary" "$rubric_id" "$rubric"
45
+ ```
46
+
47
+ Emit `tribunal.session.start` with the resolved config (`judge_types`, `gate_policy`, `score_threshold`, `max_iterations`, model IDs).
48
+
49
+ ## The loop
50
+
51
+ For `iteration_number` from `0` while `iteration_number < max_iterations`:
52
+
53
+ 1. **Iteration start.** Generate `iteration_id=$(tribunal_ulid)`. `trigger` is `"initial"` for n=0, `"gate_blocked"` for retries. Emit `tribunal.iteration.start`.
54
+
55
+ 2. **Actor.** Emit `tribunal.actor.start`. Use the Task tool to spawn `tribunal-actor` with:
56
+ - The task description.
57
+ - The rubric criteria (just `name` + `weight` + `min_pass`).
58
+ - On retries: a digest of the prior iteration's consensus, dissent (if any), and Meta-Judge override.
59
+
60
+ Capture the Actor's final output. Persist it: `tribunal_write_actor_output "$project_key" "$task_id" "$iteration_id" "$actor_output"`. Emit `tribunal.actor.complete` with `success: true` and the inferred `artifact_kind` (`file` / `patch` / `message` / `command`).
61
+
62
+ 3. **Empanel the jury.** Resolve the panel from configured types:
63
+ ```bash
64
+ types=$(tribunal_config_get_json '.tribunal.session.judge_types')
65
+ # Rubric may override:
66
+ rubric_types=$(printf '%s' "$rubric" | jq -c '.judge_types // empty')
67
+ [[ -n "$rubric_types" && "$rubric_types" != "null" ]] && types="$rubric_types"
68
+ jury=$(tribunal_jury_empanel "$types")
69
+ ```
70
+ Persist the jury (`tribunal_write_iteration_artifact ... jury ...`) and emit `tribunal.jury.empaneled` with the schema-shaped `judges[]` (`tribunal_jury_to_schema_judges "$jury"`).
71
+
72
+ 4. **Run each Judge.** For each entry in the jury panel:
73
+ - Emit `tribunal.judge.start` with `judge_id`, `judge_type`, `judge_model_id`.
74
+ - Spawn the judge subagent (`.subagent` field) with the Actor output + rubric.
75
+ - Parse the JSON object the judge returns. Augment it with `task_id`, `iteration_id`, `judge_id`, `judge_model_id` from the panel entry, and `judge_type` from the panel entry (canonical, overriding what the agent self-reported).
76
+ - Emit `tribunal.verdict` with that payload.
77
+ - Persist with `tribunal_write_judge_verdict`.
78
+
79
+ Collect the verdicts into a JSON array `verdicts`.
80
+
81
+ 5. **Aggregate + dissent.**
82
+ ```bash
83
+ method=$(printf '%s' "$rubric" | jq -r '.aggregation_method // "weighted_mean"')
84
+ threshold=$(printf '%s' "$rubric" | jq -r '.score_threshold // 0.75')
85
+ dissent_threshold=$(tribunal_config_get '.tribunal.session.dissent_threshold')
86
+ [[ -z "$dissent_threshold" ]] && dissent_threshold="0.25"
87
+
88
+ aggregated=$(tribunal_aggregate "$method" "$verdicts" "$rubric")
89
+ dissent=$(tribunal_disagreement "$verdicts")
90
+ ```
91
+ Build and emit `tribunal.consensus.reached`. If `dissent > dissent_threshold`, emit `tribunal.dissent.recorded` (set `resolution` to `null` for now — the Meta-Judge may set it on the next step via `override_recommendation`).
92
+
93
+ 6. **Meta-Judge.** Emit `tribunal.meta.start`. Spawn `tribunal-meta-judge` with the verdicts and the Actor output. Parse its JSON; augment with `task_id`, `iteration_id`, `meta_model_id`. Emit `tribunal.meta.complete`. Persist.
94
+
95
+ 7. **Gate.**
96
+ ```bash
97
+ policy=$(printf '%s' "$rubric" | jq -r '.gate_policy // "majority"')
98
+ gate=$(tribunal_gate_decide "$policy" "$verdicts" "$aggregated" "$threshold" "$meta" "$dissent" "$dissent_threshold")
99
+ ```
100
+ If `gate.passed == true`, emit `tribunal.gate.passed` with `final_score: aggregated` and break the loop with outcome `accepted`. Otherwise emit `tribunal.gate.blocked` with the `reason`, `will_retry: (iteration_number + 1 < max_iterations)`, and `retry_iteration_number` if retrying. Persist `gate.json` either way.
101
+
102
+ If blocking and retrying, build the retry digest (lowest-scoring criteria + meta override + dissent summary) and feed it into the next iteration's Actor prompt.
103
+
104
+ ## Termination
105
+
106
+ When the loop exits:
107
+
108
+ - `accepted` — gate passed.
109
+ - `exhausted_iterations` — loop ran `max_iterations` without acceptance.
110
+ - `aborted` — orchestrator caught an unrecoverable error (rubric validation failed, Actor subagent crashed twice, etc.). Set this explicitly when you catch errors; do not silently swallow.
111
+
112
+ Emit `tribunal.session.complete` with `outcome`, `final_score`, `iterations_used`, `total_duration_ms`. Skip `total_cost_usd` in v0.1 — the runtime does not surface subagent costs to the orchestrator yet.
113
+
114
+ ## Summary to the user
115
+
116
+ After emitting `session.complete`, render a compact markdown summary to the user:
117
+
118
+ - Verdict (✓ accepted / ✗ rejected / ⏱ exhausted / ⚠ aborted) with final score.
119
+ - Per-iteration table: iteration | per-judge scores | dissent | gate result.
120
+ - Meta-Judge bias notes if any.
121
+ - Path to the persisted artifacts (`~/.onlooker/tribunal/<key>/<task_id>/`).
122
+
123
+ Keep the summary terse. The artifacts on disk are the long form.
124
+
125
+ ## Error handling
126
+
127
+ - If a judge subagent fails to return parseable JSON, treat that judge as `score: 0, passed: false, confidence: 0` and surface the parse error in `feedback_summary`. Do not abort the iteration — let the gate decide.
128
+ - If the Meta-Judge fails, default to `verdict_quality: "questionable", bias_detected: false` so the gate falls back to score-based logic.
129
+ - If event emission fails (schema validation), keep going and write a warning to stderr. The persisted artifacts on disk are still trustworthy.
@@ -10,18 +10,56 @@
10
10
  "extra-files": [
11
11
  {
12
12
  "type": "json",
13
- "path": "package.json",
13
+ "path": ".claude-plugin/plugin.json",
14
+ "jsonpath": "$.version"
15
+ }
16
+ ]
17
+ },
18
+ "plugins/archivist": {
19
+ "changelog-path": "CHANGELOG.md",
20
+ "release-type": "simple",
21
+ "bump-minor-pre-major": true,
22
+ "bump-patch-for-minor-pre-major": false,
23
+ "component": "archivist",
24
+ "draft": false,
25
+ "prerelease": false,
26
+ "extra-files": [
27
+ {
28
+ "type": "json",
29
+ "path": ".claude-plugin/plugin.json",
14
30
  "jsonpath": "$.version"
15
- },
31
+ }
32
+ ]
33
+ },
34
+ "plugins/tribunal": {
35
+ "changelog-path": "CHANGELOG.md",
36
+ "release-type": "simple",
37
+ "bump-minor-pre-major": true,
38
+ "bump-patch-for-minor-pre-major": false,
39
+ "component": "tribunal",
40
+ "draft": false,
41
+ "prerelease": false,
42
+ "extra-files": [
16
43
  {
17
44
  "type": "json",
18
45
  "path": ".claude-plugin/plugin.json",
19
46
  "jsonpath": "$.version"
20
- },
47
+ }
48
+ ]
49
+ },
50
+ "plugins/echo": {
51
+ "changelog-path": "CHANGELOG.md",
52
+ "release-type": "simple",
53
+ "bump-minor-pre-major": true,
54
+ "bump-patch-for-minor-pre-major": false,
55
+ "component": "echo",
56
+ "draft": false,
57
+ "prerelease": false,
58
+ "extra-files": [
21
59
  {
22
60
  "type": "json",
23
- "path": ".claude-plugin/marketplace.json",
24
- "jsonpath": "$.plugins[0].version"
61
+ "path": ".claude-plugin/plugin.json",
62
+ "jsonpath": "$.version"
25
63
  }
26
64
  ]
27
65
  }
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env node
2
+ // Bash "tested-functions ratio" heuristic.
3
+ //
4
+ // True bash line coverage (bashcov / kcov) is heavy and flaky in CI, so
5
+ // instead we ask the cheaper question: "for every public function defined
6
+ // in scripts/lib/, does at least one bats test reference it by name?". The
7
+ // result is a per-file ratio plus a flat list of untested public functions.
8
+ //
9
+ // What counts as a "public" function:
10
+ // * defined with either `name() { ... }` or `function name { ... }`
11
+ // * name does NOT start with an underscore (those are private helpers and
12
+ // should be tested indirectly through their callers).
13
+ //
14
+ // What counts as a "reference" in tests:
15
+ // * the function name appears as a standalone word in any *.bats file
16
+ // (typical patterns: `run my_func ...`, `my_func "$arg"`, or sourced
17
+ // and called directly). False positives are possible — that's the cost
18
+ // of a heuristic — but the score is still useful as a regression gate
19
+ // and is calibrated against the noise floor.
20
+ //
21
+ // Flags:
22
+ // --json emit structured JSON on stdout (default: human-readable)
23
+ // --root <p> override repo root
24
+ //
25
+ // Exit codes: always 0; this is an informational tool. Use --json to feed
26
+ // into format-comment.mjs.
27
+
28
+ import { readdirSync, readFileSync, statSync } from 'node:fs';
29
+ import { dirname, join, relative, resolve } from 'node:path';
30
+ import { fileURLToPath } from 'node:url';
31
+
32
+ function findRepoRoot(start) {
33
+ let cur = resolve(start);
34
+ while (cur !== '/') {
35
+ try {
36
+ statSync(join(cur, '.claude-plugin', 'marketplace.json'));
37
+ return cur;
38
+ } catch {}
39
+ cur = dirname(cur);
40
+ }
41
+ throw new Error(`no repo root above ${start}`);
42
+ }
43
+
44
+ function parseArgs(argv) {
45
+ const out = { json: false, root: null };
46
+ for (let i = 2; i < argv.length; i++) {
47
+ if (argv[i] === '--json') out.json = true;
48
+ else if (argv[i] === '--root') out.root = argv[++i];
49
+ }
50
+ return out;
51
+ }
52
+
53
+ function walk(dir, predicate) {
54
+ const out = [];
55
+ const stack = [dir];
56
+ while (stack.length) {
57
+ const cur = stack.pop();
58
+ let items;
59
+ try {
60
+ items = readdirSync(cur, { withFileTypes: true });
61
+ } catch {
62
+ continue;
63
+ }
64
+ for (const item of items) {
65
+ const p = join(cur, item.name);
66
+ if (item.isDirectory()) {
67
+ if (item.name === 'node_modules' || item.name === '.git') continue;
68
+ stack.push(p);
69
+ } else if (item.isFile() && predicate(p)) {
70
+ out.push(p);
71
+ }
72
+ }
73
+ }
74
+ return out.sort();
75
+ }
76
+
77
+ // Extract `name` from lines like `name() {`, `name () {`, or `function name`.
78
+ // Skips lines indented (those are nested fns / non-top-level callbacks we
79
+ // don't want to attribute to the file's public surface).
80
+ function extractFunctions(content) {
81
+ const out = [];
82
+ const lines = content.split(/\r?\n/);
83
+ const def = /^(?:function\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(\s*\)\s*\{?/;
84
+ for (const line of lines) {
85
+ // Strict: must start at column 0 (no leading whitespace).
86
+ if (line.length === 0 || line[0] === ' ' || line[0] === '\t') continue;
87
+ const m = line.match(def);
88
+ if (!m) continue;
89
+ const name = m[1];
90
+ // Skip private helpers and bash keywords that look like fn names.
91
+ if (name.startsWith('_')) continue;
92
+ if (['if', 'while', 'for', 'case', 'then', 'do', 'else', 'fi', 'done'].includes(name)) continue;
93
+ out.push(name);
94
+ }
95
+ return [...new Set(out)];
96
+ }
97
+
98
+ function isReferenced(name, testsContent) {
99
+ // Look for the name as a standalone word (preceded/followed by non-word
100
+ // characters). This catches `run name`, `name "$x"`, `$( name )`, etc.
101
+ const rx = new RegExp(`(^|[^A-Za-z0-9_])${name}([^A-Za-z0-9_]|$)`);
102
+ return rx.test(testsContent);
103
+ }
104
+
105
+ function main() {
106
+ const args = parseArgs(process.argv);
107
+ const here = dirname(fileURLToPath(import.meta.url));
108
+ const root = args.root ? resolve(args.root) : findRepoRoot(here);
109
+
110
+ const libDirs = [join(root, 'scripts', 'lib'), join(root, 'plugins', 'archivist', 'scripts', 'lib')];
111
+ const libFiles = [];
112
+ for (const d of libDirs) {
113
+ try {
114
+ libFiles.push(...walk(d, (p) => p.endsWith('.sh')));
115
+ } catch {}
116
+ }
117
+
118
+ const testsDir = join(root, 'test', 'bats');
119
+ const testFiles = walk(testsDir, (p) => p.endsWith('.bats'));
120
+ const testsContent = testFiles.map((f) => readFileSync(f, 'utf8')).join('\n');
121
+
122
+ const perFile = [];
123
+ let totalFns = 0;
124
+ let totalTested = 0;
125
+ const untested = [];
126
+
127
+ for (const file of libFiles) {
128
+ const fns = extractFunctions(readFileSync(file, 'utf8'));
129
+ const tested = fns.filter((name) => isReferenced(name, testsContent));
130
+ const fileTotal = fns.length;
131
+ const fileTested = tested.length;
132
+ totalFns += fileTotal;
133
+ totalTested += fileTested;
134
+ const relpath = relative(root, file);
135
+ perFile.push({
136
+ file: relpath,
137
+ total: fileTotal,
138
+ tested: fileTested,
139
+ ratio: fileTotal === 0 ? 1 : fileTested / fileTotal,
140
+ untested: fns.filter((n) => !tested.includes(n)),
141
+ });
142
+ for (const u of fns.filter((n) => !tested.includes(n))) {
143
+ untested.push({ file: relpath, name: u });
144
+ }
145
+ }
146
+
147
+ const overallRatio = totalFns === 0 ? 1 : totalTested / totalFns;
148
+ const report = {
149
+ overall: { total: totalFns, tested: totalTested, ratio: overallRatio },
150
+ files: perFile,
151
+ untested,
152
+ };
153
+
154
+ if (args.json) {
155
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
156
+ return;
157
+ }
158
+
159
+ process.stdout.write(`bash function coverage: ${totalTested}/${totalFns} (${(overallRatio * 100).toFixed(1)}%)\n\n`);
160
+ for (const f of perFile) {
161
+ const pct = (f.ratio * 100).toFixed(0).padStart(3);
162
+ process.stdout.write(` ${pct}% ${f.tested}/${f.total} ${f.file}\n`);
163
+ for (const u of f.untested) {
164
+ process.stdout.write(` - ${u}\n`);
165
+ }
166
+ }
167
+ }
168
+
169
+ main();
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env node
2
+ // Combine node coverage + bash function coverage into a single markdown
3
+ // comment suitable for posting on a pull request via `gh pr comment`.
4
+ //
5
+ // Reads each report from a file path (so the caller can capture stdout
6
+ // once and pass the file through). Emits markdown on stdout.
7
+ //
8
+ // Usage:
9
+ // format-comment.mjs --node coverage-node.json --bash coverage-bash.json
10
+ //
11
+ // Each file should be JSON produced by the matching script's --json mode.
12
+
13
+ import { readFileSync } from 'node:fs';
14
+
15
+ function parseArgs(argv) {
16
+ const out = { node: null, bash: null, sha: process.env.GITHUB_SHA ?? null };
17
+ for (let i = 2; i < argv.length; i++) {
18
+ if (argv[i] === '--node') out.node = argv[++i];
19
+ else if (argv[i] === '--bash') out.bash = argv[++i];
20
+ else if (argv[i] === '--sha') out.sha = argv[++i];
21
+ }
22
+ return out;
23
+ }
24
+
25
+ function pct(n) {
26
+ if (typeof n !== 'number' || Number.isNaN(n)) return '—';
27
+ return `${n.toFixed(1)}%`;
28
+ }
29
+
30
+ function badge(value, kind) {
31
+ if (typeof value !== 'number') return '⚪';
32
+ if (kind === 'bash') {
33
+ if (value >= 70) return '🟢';
34
+ if (value >= 50) return '🟡';
35
+ return '🔴';
36
+ }
37
+ if (value >= 80) return '🟢';
38
+ if (value >= 60) return '🟡';
39
+ return '🔴';
40
+ }
41
+
42
+ function nodeSection(report) {
43
+ if (!report?.overall) {
44
+ return '_No node coverage report._';
45
+ }
46
+ const o = report.overall;
47
+ const lines = [
48
+ `**Overall:** ${badge(o.line, 'node')} ${pct(o.line)} lines · ${pct(o.branch)} branches · ${pct(o.funcs)} functions`,
49
+ '',
50
+ '| file | line | branch | funcs |',
51
+ '| --- | ---: | ---: | ---: |',
52
+ ];
53
+ for (const f of report.files) {
54
+ lines.push(`| \`${f.file}\` | ${pct(f.line)} | ${pct(f.branch)} | ${pct(f.funcs)} |`);
55
+ }
56
+ return lines.join('\n');
57
+ }
58
+
59
+ function bashSection(report) {
60
+ if (!report?.overall) {
61
+ return '_No bash function coverage report._';
62
+ }
63
+ const o = report.overall;
64
+ const overallPct = o.ratio * 100;
65
+ const lines = [
66
+ `**Overall:** ${badge(overallPct, 'bash')} ${o.tested}/${o.total} public functions exercised by bats (${pct(overallPct)})`,
67
+ '',
68
+ '| file | tested / total | ratio |',
69
+ '| --- | ---: | ---: |',
70
+ ];
71
+ for (const f of report.files) {
72
+ if (f.total === 0) continue;
73
+ lines.push(`| \`${f.file}\` | ${f.tested} / ${f.total} | ${pct(f.ratio * 100)} |`);
74
+ }
75
+ if (report.untested.length > 0) {
76
+ lines.push('');
77
+ lines.push('<details><summary>Untested public functions</summary>');
78
+ lines.push('');
79
+ for (const u of report.untested) {
80
+ lines.push(`- \`${u.file}\` — \`${u.name}\``);
81
+ }
82
+ lines.push('');
83
+ lines.push('</details>');
84
+ }
85
+ return lines.join('\n');
86
+ }
87
+
88
+ function main() {
89
+ const args = parseArgs(process.argv);
90
+ let nodeReport = null;
91
+ let bashReport = null;
92
+ if (args.node) nodeReport = JSON.parse(readFileSync(args.node, 'utf8'));
93
+ if (args.bash) bashReport = JSON.parse(readFileSync(args.bash, 'utf8'));
94
+
95
+ const out = [];
96
+ out.push('<!-- onlooker-coverage-comment -->');
97
+ out.push('## Coverage');
98
+ out.push('');
99
+ if (args.sha) {
100
+ out.push(`Commit: \`${args.sha.slice(0, 12)}\``);
101
+ out.push('');
102
+ }
103
+ out.push('### Node (.mjs)');
104
+ out.push('');
105
+ out.push(nodeSection(nodeReport));
106
+ out.push('');
107
+ out.push('### Bash (function-reference heuristic)');
108
+ out.push('');
109
+ out.push(bashSection(bashReport));
110
+ out.push('');
111
+ out.push('---');
112
+ out.push('');
113
+ out.push(
114
+ 'Bash numbers are a heuristic — they count public functions referenced by bats tests, not real line coverage. A red score points to public helpers nobody directly exercises.',
115
+ );
116
+
117
+ process.stdout.write(`${out.join('\n')}\n`);
118
+ }
119
+
120
+ main();