@onlooker-community/ecosystem 0.10.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.claude-plugin/marketplace.json +39 -1
  2. package/.claude-plugin/plugin.json +2 -2
  3. package/.github/copilot-instructions.md +46 -0
  4. package/.github/workflows/coverage.yml +78 -0
  5. package/.github/workflows/release.yml +24 -8
  6. package/.github/workflows/test.yml +3 -0
  7. package/.markdownlintignore +3 -0
  8. package/.release-please-manifest.json +5 -1
  9. package/CHANGELOG.md +44 -0
  10. package/README.md +58 -13
  11. package/config.json +6 -1
  12. package/docs/adr/001-claude-code-hooks-as-integration-surface.md +43 -0
  13. package/docs/adr/002-centralized-jsonl-event-log.md +39 -0
  14. package/docs/adr/003-ulid-over-uuid.md +40 -0
  15. package/docs/adr/004-plugin-config-with-settings-overlay.md +34 -0
  16. package/docs/architecture.md +123 -0
  17. package/hooks/hooks.json +4 -0
  18. package/package.json +13 -7
  19. package/plugins/archivist/.claude-plugin/plugin.json +14 -0
  20. package/plugins/archivist/CHANGELOG.md +8 -0
  21. package/plugins/archivist/README.md +105 -0
  22. package/plugins/archivist/config.json +18 -0
  23. package/plugins/archivist/hooks/hooks.json +35 -0
  24. package/plugins/archivist/scripts/hooks/archivist-extract.sh +238 -0
  25. package/plugins/archivist/scripts/hooks/archivist-inject.sh +159 -0
  26. package/plugins/archivist/scripts/lib/archivist-config.sh +66 -0
  27. package/plugins/archivist/scripts/lib/archivist-project-key.sh +91 -0
  28. package/plugins/archivist/scripts/lib/archivist-storage.sh +215 -0
  29. package/plugins/archivist/scripts/lib/archivist-ulid.sh +52 -0
  30. package/plugins/cartographer/.claude-plugin/plugin.json +14 -0
  31. package/plugins/cartographer/CHANGELOG.md +27 -0
  32. package/plugins/cartographer/README.md +113 -0
  33. package/plugins/cartographer/config.json +21 -0
  34. package/plugins/cartographer/docs/adr/001-background-audit-launch.md +28 -0
  35. package/plugins/cartographer/docs/adr/002-flock-pid-file-fallback.md +30 -0
  36. package/plugins/cartographer/docs/adr/003-at-least-once-event-delivery.md +32 -0
  37. package/plugins/cartographer/docs/adr/004-exclude-paths-replace-semantics.md +27 -0
  38. package/plugins/cartographer/hooks/hooks.json +44 -0
  39. package/plugins/cartographer/scripts/hooks/cartographer-post-write.sh +87 -0
  40. package/plugins/cartographer/scripts/hooks/cartographer-session-start.sh +89 -0
  41. package/plugins/cartographer/scripts/lib/cartographer-analyze.sh +286 -0
  42. package/plugins/cartographer/scripts/lib/cartographer-collect.sh +59 -0
  43. package/plugins/cartographer/scripts/lib/cartographer-config.sh +105 -0
  44. package/plugins/cartographer/scripts/lib/cartographer-events.sh +82 -0
  45. package/plugins/cartographer/scripts/lib/cartographer-lock.sh +38 -0
  46. package/plugins/cartographer/scripts/lib/cartographer-project-key.sh +55 -0
  47. package/plugins/cartographer/scripts/lib/cartographer-ulid.sh +47 -0
  48. package/plugins/cartographer/scripts/run-audit.sh +309 -0
  49. package/plugins/cartographer/skills/cartographer/SKILL.md +154 -0
  50. package/plugins/echo/.claude-plugin/plugin.json +14 -0
  51. package/plugins/echo/CHANGELOG.md +24 -0
  52. package/plugins/echo/README.md +110 -0
  53. package/plugins/echo/config.json +15 -0
  54. package/plugins/echo/docs/adr/001-echo-as-separate-plugin.md +33 -0
  55. package/plugins/echo/docs/adr/002-direct-evaluation-vs-tribunal-pipeline.md +35 -0
  56. package/plugins/echo/docs/adr/003-stop-hook-trigger.md +40 -0
  57. package/plugins/echo/hooks/hooks.json +15 -0
  58. package/plugins/echo/scripts/hooks/echo-stop-gate.sh +366 -0
  59. package/plugins/echo/scripts/lib/echo-config.sh +108 -0
  60. package/plugins/echo/scripts/lib/echo-events.sh +74 -0
  61. package/plugins/echo/scripts/lib/echo-project-key.sh +81 -0
  62. package/plugins/echo/scripts/lib/echo-ulid.sh +46 -0
  63. package/plugins/tribunal/.claude-plugin/plugin.json +20 -0
  64. package/plugins/tribunal/CHANGELOG.md +10 -0
  65. package/plugins/tribunal/README.md +134 -0
  66. package/plugins/tribunal/agents/tribunal-actor.md +35 -0
  67. package/plugins/tribunal/agents/tribunal-judge-adversarial.md +51 -0
  68. package/plugins/tribunal/agents/tribunal-judge-security.md +47 -0
  69. package/plugins/tribunal/agents/tribunal-judge-standard.md +47 -0
  70. package/plugins/tribunal/agents/tribunal-meta-judge.md +61 -0
  71. package/plugins/tribunal/config.json +50 -0
  72. package/plugins/tribunal/docs/adr/001-actor-jury-meta-gate-loop.md +40 -0
  73. package/plugins/tribunal/docs/adr/002-majority-gate-policy.md +48 -0
  74. package/plugins/tribunal/hooks/hooks.json +15 -0
  75. package/plugins/tribunal/scripts/hooks/tribunal-stop-gate.sh +267 -0
  76. package/plugins/tribunal/scripts/lib/tribunal-aggregate.sh +65 -0
  77. package/plugins/tribunal/scripts/lib/tribunal-config.sh +101 -0
  78. package/plugins/tribunal/scripts/lib/tribunal-events.sh +97 -0
  79. package/plugins/tribunal/scripts/lib/tribunal-gate.sh +111 -0
  80. package/plugins/tribunal/scripts/lib/tribunal-jury.sh +102 -0
  81. package/plugins/tribunal/scripts/lib/tribunal-project-key.sh +84 -0
  82. package/plugins/tribunal/scripts/lib/tribunal-rubric.sh +153 -0
  83. package/plugins/tribunal/scripts/lib/tribunal-ulid.sh +50 -0
  84. package/plugins/tribunal/scripts/lib/tribunal-verdict.sh +127 -0
  85. package/plugins/tribunal/skills/tribunal/SKILL.md +129 -0
  86. package/release-please-config.json +59 -5
  87. package/scripts/coverage/bash-coverage.mjs +169 -0
  88. package/scripts/coverage/format-comment.mjs +120 -0
  89. package/scripts/coverage/run-coverage.mjs +151 -0
  90. package/scripts/hooks/agent-spawn-tracker.sh +4 -4
  91. package/scripts/hooks/prompt-rule-injector.sh +122 -0
  92. package/scripts/lib/portable-lock.sh +48 -0
  93. package/scripts/lib/prompt-rules.sh +207 -0
  94. package/scripts/lib/tool-history.sh +7 -8
  95. package/scripts/lib/validate-path.sh +4 -0
  96. package/scripts/lint/check-manifests.mjs +314 -0
  97. package/scripts/lint/check-references.mjs +311 -0
  98. package/skills/list-prompt-rules/SKILL.md +15 -0
  99. package/test/bats/archivist-config-files.bats +60 -0
  100. package/test/bats/archivist-config.bats +54 -0
  101. package/test/bats/archivist-inject.bats +73 -0
  102. package/test/bats/archivist-project-key.bats +75 -0
  103. package/test/bats/archivist-storage.bats +119 -0
  104. package/test/bats/archivist-ulid.bats +36 -0
  105. package/test/bats/cartographer-config.bats +107 -0
  106. package/test/bats/cartographer-lock.bats +77 -0
  107. package/test/bats/cartographer-ulid.bats +56 -0
  108. package/test/bats/config.bats +10 -10
  109. package/test/bats/echo-config.bats +90 -0
  110. package/test/bats/echo-events.bats +121 -0
  111. package/test/bats/echo-project-key.bats +115 -0
  112. package/test/bats/echo-stop-hook.bats +101 -0
  113. package/test/bats/echo-ulid.bats +38 -0
  114. package/test/bats/portable-lock.bats +62 -0
  115. package/test/bats/prompt-rules.bats +269 -0
  116. package/test/bats/tribunal-aggregate.bats +77 -0
  117. package/test/bats/tribunal-config.bats +86 -0
  118. package/test/bats/tribunal-events.bats +209 -0
  119. package/test/bats/tribunal-gate.bats +95 -0
  120. package/test/bats/tribunal-jury.bats +80 -0
  121. package/test/bats/tribunal-rubric.bats +119 -0
  122. package/test/bats/tribunal-stop-hook.bats +73 -0
  123. package/test/bats/tribunal-verdict.bats +71 -0
  124. package/test/fixtures/hook-inputs/user-prompt-submit-rule-match.json +8 -0
  125. package/test/fixtures/hook-inputs/user-prompt-submit-rule-nomatch.json +8 -0
  126. package/test/helpers/setup.bash +9 -0
  127. package/test/node/check-manifests.test.mjs +173 -0
  128. package/test/node/check-references.test.mjs +279 -0
  129. package/test/node/coverage.test.mjs +143 -0
@@ -0,0 +1,35 @@
1
+ # ADR-002: Direct `claude -p` Evaluation vs. Routing Through Tribunal's Pipeline
2
+
3
+ **Status:** Accepted (with planned future extension)
4
+ **Date:** 2026-05-24
5
+
6
+ ## Context
7
+
8
+ Echo needs to evaluate prompt file quality before and after a change. Two approaches were available:
9
+
10
+ **Option A — Direct `claude -p`**: Build an inline rubric prompt, call `claude -p --max-turns 1` for each file, and parse the JSON score from the response.
11
+
12
+ **Option B — Tribunal pipeline**: Invoke Tribunal's multi-judge Actor → Jury → Meta-Judge → Gate loop for each file and use the aggregated score as the quality signal.
13
+
14
+ ## Decision
15
+
16
+ Echo v0.1 uses **Option A** — direct `claude -p` with an inline rubric.
17
+
18
+ ## Rationale
19
+
20
+ **Stop hook latency budget.** A Stop hook fires synchronously at the end of every session. Tribunal's full loop (Actor + two judges + Meta-Judge + Gate, with potential retries) takes 30–120 seconds per task. Multiplied across several watched files, this would make sessions feel like they hang after every edit. A single `claude -p` call with a 60-second timeout keeps the overhead acceptable.
21
+
22
+ **Echo evaluates prompts, not outputs.** Tribunal's loop is designed to evaluate an Agent's *work product* against a rubric. Echo evaluates *the prompt file itself* — a simpler, single-document task. A full jury is architecturally overweight for this use case.
23
+
24
+ **Baseline stability.** Tribunal's multi-judge scores have meaningful variance across runs (different judge models, adversarial judge behavior, Meta-Judge overrides). Echo's baseline comparison depends on stable, reproducible scores — a single `claude -p` pass with a fixed model and rubric is more consistent as a yardstick.
25
+
26
+ **Haiku is cheap enough.** Evaluating a prompt file with Haiku costs a fraction of a cent. Running a full Tribunal loop (Opus-class models for judges) would cost 10–50× more per file per session. With a default model of `claude-haiku-4-5-20251001`, Echo can run automatically without raising cost concerns.
27
+
28
+ **Independent of Tribunal installation.** Option A requires only the `claude` CLI. Option B would make Tribunal a hard runtime dependency of Echo, coupling two plugins that have separate versioning and installation paths (see ADR-001).
29
+
30
+ ## Consequences
31
+
32
+ - Echo's evaluation quality is bounded by a single-model, single-pass rubric. It will miss issues that a diverse jury would catch, but it is consistent enough to detect regressions.
33
+ - The scoring rubric (role clarity, output format, criterion coverage, internal consistency) is hardcoded in the hook rather than being user-overridable in v0.1. A future version should expose this as config.
34
+ - A future `echo.mode: "tribunal"` config option could delegate to Tribunal's jury for higher-confidence evaluation when cost and latency are acceptable. The current design leaves room for this — Echo's event schema (`echo.suite.started`, etc.) is agnostic to the underlying evaluator.
35
+ - The `claude -p` response parsing includes a `sed` strip for accidental markdown fences, which Tribunal's pipeline avoids by using structured judge output. This is a fragility to watch.
@@ -0,0 +1,40 @@
1
+ # ADR-003: Stop Hook as the Trigger Mechanism
2
+
3
+ **Status:** Accepted
4
+ **Date:** 2026-05-24
5
+
6
+ ## Context
7
+
8
+ Echo needs to know when an agent file has changed and run an evaluation. Several trigger points were considered:
9
+
10
+ - **Stop hook** — fires when a Claude Code session ends.
11
+ - **Pre-commit hook** — fires when the developer runs `git commit`.
12
+ - **PostToolUse hook** — fires after every tool call that writes a file.
13
+ - **CI step** — fires on push to a remote branch.
14
+ - **Manual `/echo` skill** — user-invoked on demand.
15
+
16
+ ## Decision
17
+
18
+ Echo v0.1 uses the **Stop hook**.
19
+
20
+ ## Rationale
21
+
22
+ **Correct granularity.** A session is the natural unit of prompt engineering work. A developer edits `tribunal-judge-standard.md`, tests it through several turns, and ends the session. That's the moment Echo should fire — after the work is done, not after each intermediate save.
23
+
24
+ **Claude Code already provides it.** The Stop hook is a first-class Claude Code hook type with a well-defined contract: the hook receives `{cwd, session_id}` on stdin and must exit 0 (or the session stop is blocked, which is why Echo always exits 0). No additional tooling or git hooks needed.
25
+
26
+ **Consistent with Tribunal's pattern.** Tribunal's Stop hook (when enabled) follows the same pattern — an advisory pass that fires at session end without blocking the stop. Echo mirrors this, which keeps the plugin model coherent across the ecosystem.
27
+
28
+ **No commit discipline required.** A pre-commit hook would only fire when the developer commits. Many prompt engineering workflows involve many experimental edits before any commit. Echo should capture signal on *any* session where a watched file changed, not only committed ones. Untracked and unstaged files are explicitly included in Echo's change detection.
29
+
30
+ **Low friction.** PostToolUse fires on every file write, which would run evaluations continuously mid-session — expensive, noisy, and disruptive. The Stop hook batches all changes from a session into a single suite run.
31
+
32
+ **Not CI.** CI integration has value but is a separate concern. A CI step can't write to `~/.onlooker/` on the developer's machine, and the baseline comparison is inherently local. Echo is a local development feedback tool; CI integration (e.g., posting drift to a PR comment) is a future feature.
33
+
34
+ ## Consequences
35
+
36
+ - The recursion guard (`ECHO_NESTED=1`) is mandatory. `claude -p` spawns a subprocess that also triggers Stop, which would re-enter the hook infinitely. The guard must be set before any work begins and is checked as the very first statement.
37
+ - Echo cannot fire mid-session, so rapid iteration on a prompt file produces one signal per session, not one per edit. This is a feature for reducing noise, but means a long session with many edits only records the final state of each file.
38
+ - If a session ends without the developer saving changes (e.g., closed the terminal abruptly), the Stop hook may not fire. This is consistent with how all Stop hooks in Claude Code behave.
39
+ - Users who want on-demand evaluation can invoke Echo's logic manually by calling the hook directly. A future `/echo` skill could wrap this.
40
+ - The hook must be registered in `hooks.json` with `"matcher": "*"` so it fires on all sessions. Projects that want to opt out can set `echo.enabled: false` rather than removing the hook registration.
@@ -0,0 +1,15 @@
1
+ {
2
+ "hooks": {
3
+ "Stop": [
4
+ {
5
+ "matcher": "*",
6
+ "hooks": [
7
+ {
8
+ "type": "command",
9
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/echo-stop-gate.sh"
10
+ }
11
+ ]
12
+ }
13
+ ]
14
+ }
15
+ }
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/env bash
2
+ # Echo Stop-gate hook.
3
+ #
4
+ # Triggered by Stop. Off by default — gated on echo.enabled in config.
5
+ # When enabled, detects which watched agent files changed in this session,
6
+ # runs a single-judge advisory pass on each, and compares the score against a
7
+ # stored baseline to report improved / degraded / neutral.
8
+ #
9
+ # Hook contract:
10
+ # - Always exits 0. Never blocks Stop.
11
+ # - Skips silently if disabled, no git context, or no watched files changed.
12
+ # - Recursion guard: exits immediately if ECHO_NESTED=1 to prevent a claude -p
13
+ # subprocess from re-triggering this hook on its own Writes.
14
+ # - Errors from `claude -p` are swallowed; worst case is no verdict written.
15
+
16
+ set -uo pipefail
17
+
18
+ # Recursion guard — must be first.
19
+ [[ "${ECHO_NESTED:-}" == "1" ]] && exit 0
20
+ export ECHO_NESTED=1
21
+
22
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
23
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
24
+
25
+ # Resolve the ecosystem root (sibling to this plugin's parent).
26
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
27
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
28
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
29
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
30
+ _ECOSYSTEM_ROOT="$_candidate"
31
+ fi
32
+ fi
33
+
34
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
35
+ # shellcheck disable=SC1091
36
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
37
+ # shellcheck disable=SC1091
38
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/onlooker-schema.sh"
39
+ fi
40
+
41
+ # shellcheck source=../lib/echo-config.sh
42
+ source "${PLUGIN_ROOT}/scripts/lib/echo-config.sh"
43
+ # shellcheck source=../lib/echo-project-key.sh
44
+ source "${PLUGIN_ROOT}/scripts/lib/echo-project-key.sh"
45
+ # shellcheck source=../lib/echo-ulid.sh
46
+ source "${PLUGIN_ROOT}/scripts/lib/echo-ulid.sh"
47
+ # shellcheck source=../lib/echo-events.sh
48
+ CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" source "${PLUGIN_ROOT}/scripts/lib/echo-events.sh"
49
+
50
+ INPUT=$(cat)
51
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
52
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
53
+
54
+ _done() { exit 0; }
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Config + prerequisites
58
+ # ---------------------------------------------------------------------------
59
+
60
+ REPO_ROOT=$(echo_project_repo_root "$CWD")
61
+ [[ -z "$REPO_ROOT" ]] && _done
62
+
63
+ CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_load "$REPO_ROOT"
64
+ echo_config_enabled || _done
65
+
66
+ PROJECT_KEY=$(echo_project_key "$CWD")
67
+ [[ -z "$PROJECT_KEY" ]] && _done
68
+
69
+ command -v claude >/dev/null 2>&1 || _done
70
+ command -v jq >/dev/null 2>&1 || _done
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Identify changed agent files
74
+ # ---------------------------------------------------------------------------
75
+
76
+ # Collect all changed paths: unstaged, staged, and untracked.
77
+ CHANGED_FILES=$(git -C "$REPO_ROOT" diff --name-only HEAD 2>/dev/null) || CHANGED_FILES=""
78
+ STAGED_FILES=$(git -C "$REPO_ROOT" diff --name-only --cached 2>/dev/null) || STAGED_FILES=""
79
+ UNTRACKED_FILES=$(git -C "$REPO_ROOT" ls-files --others --exclude-standard 2>/dev/null) || UNTRACKED_FILES=""
80
+ ALL_CHANGED=$(printf '%s\n%s\n%s' "$CHANGED_FILES" "$STAGED_FILES" "$UNTRACKED_FILES" | sort -u | grep -v '^$') || ALL_CHANGED=""
81
+ [[ -z "$ALL_CHANGED" ]] && _done
82
+
83
+ # Load watch and exclude patterns (bash 3 compatible — no mapfile).
84
+ WATCH_PATTERNS=()
85
+ while IFS= read -r _pat; do
86
+ [[ -n "$_pat" ]] && WATCH_PATTERNS+=("$_pat")
87
+ done < <(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_watch_paths)
88
+
89
+ EXCLUDE_PATTERNS=()
90
+ while IFS= read -r _pat; do
91
+ [[ -n "$_pat" ]] && EXCLUDE_PATTERNS+=("$_pat")
92
+ done < <(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_exclude_paths)
93
+
94
+ # Filter changed files: must match at least one watch pattern AND no exclude pattern.
95
+ WATCHED_CHANGED=()
96
+ while IFS= read -r f; do
97
+ [[ -z "$f" ]] && continue
98
+
99
+ local_match=0
100
+ for pat in "${WATCH_PATTERNS[@]}"; do
101
+ # shellcheck disable=SC2053
102
+ if [[ "$f" == $pat ]]; then
103
+ local_match=1
104
+ break
105
+ fi
106
+ done
107
+ [[ "$local_match" -eq 0 ]] && continue
108
+
109
+ excluded=0
110
+ for pat in "${EXCLUDE_PATTERNS[@]}"; do
111
+ # shellcheck disable=SC2053
112
+ if [[ "$f" == $pat ]]; then
113
+ excluded=1
114
+ break
115
+ fi
116
+ done
117
+ [[ "$excluded" -eq 1 ]] && continue
118
+
119
+ WATCHED_CHANGED+=("$f")
120
+ done <<< "$ALL_CHANGED"
121
+
122
+ [[ "${#WATCHED_CHANGED[@]}" -eq 0 ]] && _done
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # Storage paths
126
+ # ---------------------------------------------------------------------------
127
+
128
+ ONLOOKER_BASE="${ONLOOKER_DIR:-$HOME/.onlooker}"
129
+ ECHO_DIR="${ONLOOKER_BASE}/echo/${PROJECT_KEY}"
130
+ BASELINE_DIR="${ECHO_DIR}/baselines"
131
+ mkdir -p "$BASELINE_DIR" 2>/dev/null || _done
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # Evaluation loop
135
+ # ---------------------------------------------------------------------------
136
+
137
+ EVAL_MODEL=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_model)
138
+ TIMEOUT_SECS=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_timeout)
139
+ DRIFT_THRESHOLD=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" echo_config_drift_threshold)
140
+
141
+ SUITE_ID=$(echo_ulid)
142
+ SUITE_START=$(python3 -c 'import time; print(int(time.time()*1000))' 2>/dev/null || echo 0)
143
+ FIRST_CHANGED="${WATCHED_CHANGED[0]}"
144
+
145
+ suite_started_payload=$(jq -n \
146
+ --arg suite_id "$SUITE_ID" \
147
+ --argjson test_count "${#WATCHED_CHANGED[@]}" \
148
+ --arg trigger "file_change" \
149
+ --arg changed_file "$FIRST_CHANGED" \
150
+ '{suite_id: $suite_id, test_count: $test_count, trigger: $trigger, changed_file: $changed_file}')
151
+ echo_emit_event "echo.suite.started" "$suite_started_payload" || true
152
+
153
+ PROMPT_FILE=$(mktemp -t echo-prompt.XXXXXX 2>/dev/null) || PROMPT_FILE="/tmp/echo-prompt.$$"
154
+ trap 'rm -f "$PROMPT_FILE"' EXIT
155
+
156
+ count_improved=0
157
+ count_degraded=0
158
+ count_neutral=0
159
+ sum_before=0
160
+ sum_after=0
161
+ file_count=0
162
+
163
+ for rel_path in "${WATCHED_CHANGED[@]}"; do
164
+ abs_path="${REPO_ROOT}/${rel_path}"
165
+ [[ ! -f "$abs_path" ]] && continue
166
+
167
+ FILE_CONTENT=$(cat "$abs_path" 2>/dev/null) || continue
168
+ [[ -z "$FILE_CONTENT" ]] && continue
169
+
170
+ TEST_ID=$(echo_test_id_for_path "$rel_path")
171
+ BASELINE_FILE="${BASELINE_DIR}/${TEST_ID}.json"
172
+
173
+ # Build the evaluation prompt.
174
+ {
175
+ printf '%s\n' 'You are evaluating an agent prompt file for quality. Return JSON only — no prose, no markdown fences.'
176
+ printf '\n'
177
+ printf '%s\n' 'Output schema (exactly these keys):'
178
+ printf '%s\n' '{'
179
+ printf '%s\n' ' "score": 0.0..1.0,'
180
+ printf '%s\n' ' "passed": true|false,'
181
+ printf '%s\n' ' "confidence": 0.0..1.0,'
182
+ printf '%s\n' ' "feedback": "1-2 sentences on the highest-leverage issue, if any."'
183
+ printf '%s\n' '}'
184
+ printf '\n'
185
+ printf '%s\n' 'Score on these criteria (equal weight):'
186
+ printf '%s\n' ' - Role clarity: does the file clearly define what the agent is and what it must do?'
187
+ printf '%s\n' ' - Output format: are output format and schema requirements unambiguous?'
188
+ printf '%s\n' ' - Criterion coverage: are all evaluation dimensions specified with enough detail to apply consistently?'
189
+ printf '%s\n' ' - Internal consistency: no contradictory instructions, no undefined terms.'
190
+ printf '\n'
191
+ printf '%s\n' "A score >= 0.7 is \"passed\". Be concise."
192
+ printf '\n'
193
+ printf '%s\n' "---FILE: ${rel_path}---"
194
+ printf '%s\n' "$FILE_CONTENT"
195
+ printf '%s\n' '---END FILE---'
196
+ } > "$PROMPT_FILE"
197
+
198
+ CLAUDE_ARGS=(-p --max-turns 1)
199
+ [[ -n "$EVAL_MODEL" ]] && CLAUDE_ARGS+=(--model "$EVAL_MODEL")
200
+
201
+ RESPONSE=""
202
+ if command -v timeout >/dev/null 2>&1; then
203
+ RESPONSE=$(timeout "$TIMEOUT_SECS" claude "${CLAUDE_ARGS[@]}" < "$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
204
+ elif command -v gtimeout >/dev/null 2>&1; then
205
+ RESPONSE=$(gtimeout "$TIMEOUT_SECS" claude "${CLAUDE_ARGS[@]}" < "$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
206
+ else
207
+ RESPONSE=$(claude "${CLAUDE_ARGS[@]}" < "$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
208
+ fi
209
+
210
+ [[ -z "$RESPONSE" ]] && continue
211
+
212
+ CLEAN=$(printf '%s' "$RESPONSE" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//')
213
+ SCORE_AFTER=$(printf '%s' "$CLEAN" | jq -r '.score // empty' 2>/dev/null) || SCORE_AFTER=""
214
+ CONFIDENCE=$(printf '%s' "$CLEAN" | jq -r '.confidence // "0.6"' 2>/dev/null) || CONFIDENCE="0.6"
215
+ [[ -z "$SCORE_AFTER" ]] && continue
216
+
217
+ SCORE_BEFORE=""
218
+ if [[ -f "$BASELINE_FILE" ]]; then
219
+ SCORE_BEFORE=$(jq -r '.score // empty' "$BASELINE_FILE" 2>/dev/null) || SCORE_BEFORE=""
220
+ fi
221
+
222
+ # Persist new baseline.
223
+ jq -n \
224
+ --arg path "$rel_path" \
225
+ --arg test_id "$TEST_ID" \
226
+ --argjson score "$SCORE_AFTER" \
227
+ --arg ts "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
228
+ '{path: $path, test_id: $test_id, score: $score, recorded_at: $ts}' \
229
+ > "$BASELINE_FILE" 2>/dev/null || true
230
+
231
+ file_count=$((file_count + 1))
232
+ sum_after=$(python3 -c "print($sum_after + $SCORE_AFTER)" 2>/dev/null) || sum_after=$sum_after
233
+
234
+ if [[ -n "$SCORE_BEFORE" ]]; then
235
+ DELTA=$(python3 -c "print(round($SCORE_AFTER - $SCORE_BEFORE, 4))" 2>/dev/null) || DELTA="0"
236
+ sum_before=$(python3 -c "print($sum_before + $SCORE_BEFORE)" 2>/dev/null) || sum_before=$sum_before
237
+
238
+ ABS_DELTA=$(python3 -c "print(abs($DELTA))" 2>/dev/null) || ABS_DELTA="0"
239
+ IS_IMPROVED=$(python3 -c "print('true' if $DELTA > $DRIFT_THRESHOLD else 'false')" 2>/dev/null) || IS_IMPROVED="false"
240
+ IS_DEGRADED=$(python3 -c "print('true' if $DELTA < -$DRIFT_THRESHOLD else 'false')" 2>/dev/null) || IS_DEGRADED="false"
241
+
242
+ FILE_NAME=$(basename "$rel_path")
243
+
244
+ if [[ "$IS_IMPROVED" == "true" ]]; then
245
+ count_improved=$((count_improved + 1))
246
+ improvement_payload=$(jq -n \
247
+ --arg suite_id "$SUITE_ID" \
248
+ --arg test_id "$TEST_ID" \
249
+ --arg test_name "$FILE_NAME" \
250
+ --argjson score_before "$SCORE_BEFORE" \
251
+ --argjson score_after "$SCORE_AFTER" \
252
+ --argjson delta "$DELTA" \
253
+ --argjson confidence "$CONFIDENCE" \
254
+ '{suite_id: $suite_id, test_id: $test_id, test_name: $test_name,
255
+ score_before: $score_before, score_after: $score_after,
256
+ delta: $delta, confidence: $confidence}')
257
+ echo_emit_event "echo.improvement.detected" "$improvement_payload" || true
258
+
259
+ elif [[ "$IS_DEGRADED" == "true" ]]; then
260
+ count_degraded=$((count_degraded + 1))
261
+ regression_payload=$(jq -n \
262
+ --arg suite_id "$SUITE_ID" \
263
+ --arg test_id "$TEST_ID" \
264
+ --arg test_name "$FILE_NAME" \
265
+ --argjson score_before "$SCORE_BEFORE" \
266
+ --argjson score_after "$SCORE_AFTER" \
267
+ --argjson delta "$DELTA" \
268
+ --argjson confidence "$CONFIDENCE" \
269
+ '{suite_id: $suite_id, test_id: $test_id, test_name: $test_name,
270
+ score_before: $score_before, score_after: $score_after,
271
+ delta: $delta, confidence: $confidence}')
272
+ echo_emit_event "echo.regression.detected" "$regression_payload" || true
273
+ else
274
+ count_neutral=$((count_neutral + 1))
275
+ fi
276
+ else
277
+ # First evaluation for this file — no baseline to compare against yet.
278
+ count_neutral=$((count_neutral + 1))
279
+ fi
280
+ done
281
+
282
+ [[ "$file_count" -eq 0 ]] && _done
283
+
284
+ # ---------------------------------------------------------------------------
285
+ # Emit suite events
286
+ # ---------------------------------------------------------------------------
287
+
288
+ SUITE_END=$(python3 -c 'import time; print(int(time.time()*1000))' 2>/dev/null || echo 0)
289
+ DURATION_MS=$(( SUITE_END - SUITE_START ))
290
+
291
+ MERGE_RECOMMENDED="false"
292
+ [[ "$count_degraded" -eq 0 && "$count_improved" -gt 0 ]] && MERGE_RECOMMENDED="true"
293
+ [[ "$count_degraded" -eq 0 && "$count_improved" -eq 0 ]] && MERGE_RECOMMENDED="true"
294
+
295
+ if [[ "$file_count" -gt 0 && -n "$sum_before" ]] && python3 -c "exit(0 if $sum_before > 0 else 1)" 2>/dev/null; then
296
+ BASELINE_AVG=$(python3 -c "print(round($sum_before / $file_count, 4))" 2>/dev/null) || BASELINE_AVG=""
297
+ AFTER_AVG=$(python3 -c "print(round($sum_after / $file_count, 4))" 2>/dev/null) || AFTER_AVG=""
298
+ DRIFT=$(python3 -c "print(round($sum_after / $file_count - $sum_before / $file_count, 4))" 2>/dev/null) || DRIFT=""
299
+
300
+ if [[ -n "$BASELINE_AVG" && -n "$AFTER_AVG" && -n "$DRIFT" ]]; then
301
+ suite_complete_payload=$(jq -n \
302
+ --arg suite_id "$SUITE_ID" \
303
+ --argjson test_count "$file_count" \
304
+ --argjson improved "$count_improved" \
305
+ --argjson degraded "$count_degraded" \
306
+ --argjson neutral "$count_neutral" \
307
+ --argjson merge_recommended "$MERGE_RECOMMENDED" \
308
+ --argjson duration_ms "$DURATION_MS" \
309
+ --argjson baseline_score "$BASELINE_AVG" \
310
+ --argjson score_after "$AFTER_AVG" \
311
+ --argjson drift "$DRIFT" \
312
+ --argjson drift_threshold "$DRIFT_THRESHOLD" \
313
+ '{suite_id: $suite_id, test_count: $test_count,
314
+ improved: $improved, degraded: $degraded, neutral: $neutral,
315
+ merge_recommended: $merge_recommended, duration_ms: $duration_ms,
316
+ baseline_score: $baseline_score, score_after: $score_after,
317
+ drift: $drift, drift_threshold: $drift_threshold}')
318
+ else
319
+ suite_complete_payload=$(jq -n \
320
+ --arg suite_id "$SUITE_ID" \
321
+ --argjson test_count "$file_count" \
322
+ --argjson improved "$count_improved" \
323
+ --argjson degraded "$count_degraded" \
324
+ --argjson neutral "$count_neutral" \
325
+ --argjson merge_recommended "$MERGE_RECOMMENDED" \
326
+ --argjson duration_ms "$DURATION_MS" \
327
+ '{suite_id: $suite_id, test_count: $test_count,
328
+ improved: $improved, degraded: $degraded, neutral: $neutral,
329
+ merge_recommended: $merge_recommended, duration_ms: $duration_ms}')
330
+ fi
331
+ else
332
+ suite_complete_payload=$(jq -n \
333
+ --arg suite_id "$SUITE_ID" \
334
+ --argjson test_count "$file_count" \
335
+ --argjson improved "$count_improved" \
336
+ --argjson degraded "$count_degraded" \
337
+ --argjson neutral "$count_neutral" \
338
+ --argjson merge_recommended "$MERGE_RECOMMENDED" \
339
+ --argjson duration_ms "$DURATION_MS" \
340
+ '{suite_id: $suite_id, test_count: $test_count,
341
+ improved: $improved, degraded: $degraded, neutral: $neutral,
342
+ merge_recommended: $merge_recommended, duration_ms: $duration_ms}')
343
+ fi
344
+ echo_emit_event "echo.suite.complete" "$suite_complete_payload" || true
345
+
346
+ # ---------------------------------------------------------------------------
347
+ # Write advisory file for review in next session.
348
+ # ---------------------------------------------------------------------------
349
+
350
+ SAFE_SESSION_ID=$(printf '%s' "${SESSION_ID:-unknown}" | tr -c 'a-zA-Z0-9-' '_')
351
+
352
+ jq -n \
353
+ --arg suite_id "$SUITE_ID" \
354
+ --arg session_id "${SESSION_ID:-unknown}" \
355
+ --argjson test_count "$file_count" \
356
+ --argjson improved "$count_improved" \
357
+ --argjson degraded "$count_degraded" \
358
+ --argjson neutral "$count_neutral" \
359
+ --argjson merge_recommended "$MERGE_RECOMMENDED" \
360
+ --argjson files "$(printf '%s\n' "${WATCHED_CHANGED[@]}" | jq -R . | jq -s .)" \
361
+ '{suite_id: $suite_id, session_id: $session_id, test_count: $test_count,
362
+ improved: $improved, degraded: $degraded, neutral: $neutral,
363
+ merge_recommended: $merge_recommended, files: $files}' \
364
+ > "${ECHO_DIR}/run-${SAFE_SESSION_ID}.json" 2>/dev/null || true
365
+
366
+ _done
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env bash
2
+ # Config loading for Echo.
3
+ # Reads config.json from the repo's .claude/settings.json echo.* keys,
4
+ # falling back to the plugin's own config.json defaults.
5
+
6
+ _ECHO_CONFIG_JSON=""
7
+ _ECHO_PLUGIN_CONFIG_JSON=""
8
+
9
+ echo_config_load() {
10
+ local repo_root="${1:-}"
11
+
12
+ _ECHO_PLUGIN_CONFIG_JSON=""
13
+ local plugin_config="${CLAUDE_PLUGIN_ROOT:-}/config.json"
14
+ if [[ -f "$plugin_config" ]]; then
15
+ _ECHO_PLUGIN_CONFIG_JSON=$(cat "$plugin_config" 2>/dev/null) || _ECHO_PLUGIN_CONFIG_JSON=""
16
+ fi
17
+
18
+ _ECHO_CONFIG_JSON=""
19
+ if [[ -n "$repo_root" ]]; then
20
+ local settings_file="${repo_root}/.claude/settings.json"
21
+ if [[ -f "$settings_file" ]]; then
22
+ local settings
23
+ settings=$(cat "$settings_file" 2>/dev/null) || settings=""
24
+ local echo_block
25
+ echo_block=$(printf '%s' "$settings" | jq -c '.echo // empty' 2>/dev/null) || echo_block=""
26
+ [[ -n "$echo_block" ]] && _ECHO_CONFIG_JSON="$echo_block"
27
+ fi
28
+ fi
29
+ }
30
+
31
+ # Get a single scalar value. Checks settings.json first, then plugin config.json.
32
+ echo_config_get() {
33
+ local key="$1"
34
+
35
+ if [[ -n "$_ECHO_CONFIG_JSON" ]]; then
36
+ local val
37
+ val=$(printf '%s' "$_ECHO_CONFIG_JSON" | jq -r "${key} // empty" 2>/dev/null) || val=""
38
+ [[ -n "$val" && "$val" != "null" ]] && { printf '%s' "$val"; return 0; }
39
+ fi
40
+
41
+ if [[ -n "$_ECHO_PLUGIN_CONFIG_JSON" ]]; then
42
+ local val
43
+ val=$(printf '%s' "$_ECHO_PLUGIN_CONFIG_JSON" | jq -r ".echo${key} // empty" 2>/dev/null) || val=""
44
+ [[ -n "$val" && "$val" != "null" ]] && { printf '%s' "$val"; return 0; }
45
+ fi
46
+ }
47
+
48
+ echo_config_get_json() {
49
+ local key="$1"
50
+
51
+ if [[ -n "$_ECHO_CONFIG_JSON" ]]; then
52
+ local val
53
+ val=$(printf '%s' "$_ECHO_CONFIG_JSON" | jq -c "${key} // empty" 2>/dev/null) || val=""
54
+ [[ -n "$val" && "$val" != "null" && "$val" != "empty" ]] && { printf '%s' "$val"; return 0; }
55
+ fi
56
+
57
+ if [[ -n "$_ECHO_PLUGIN_CONFIG_JSON" ]]; then
58
+ local val
59
+ val=$(printf '%s' "$_ECHO_PLUGIN_CONFIG_JSON" | jq -c ".echo${key} // empty" 2>/dev/null) || val=""
60
+ [[ -n "$val" && "$val" != "null" && "$val" != "empty" ]] && { printf '%s' "$val"; return 0; }
61
+ fi
62
+ }
63
+
64
+ echo_config_enabled() {
65
+ local val
66
+ val=$(echo_config_get '.enabled')
67
+ [[ "$val" == "true" ]]
68
+ }
69
+
70
+ echo_config_model() {
71
+ local val
72
+ val=$(echo_config_get '.evaluation.model')
73
+ printf '%s' "${val:-claude-haiku-4-5-20251001}"
74
+ }
75
+
76
+ echo_config_timeout() {
77
+ local val
78
+ val=$(echo_config_get '.evaluation.timeout_seconds')
79
+ printf '%s' "${val:-60}"
80
+ }
81
+
82
+ echo_config_drift_threshold() {
83
+ local val
84
+ val=$(echo_config_get '.drift_threshold')
85
+ printf '%s' "${val:-0.05}"
86
+ }
87
+
88
+ # Prints newline-separated list of watch glob patterns.
89
+ echo_config_watch_paths() {
90
+ local raw
91
+ raw=$(echo_config_get_json '.watch_paths')
92
+ if [[ -n "$raw" ]]; then
93
+ printf '%s' "$raw" | jq -r '.[]' 2>/dev/null
94
+ else
95
+ printf 'plugins/*/agents/*.md\n'
96
+ fi
97
+ }
98
+
99
+ # Prints newline-separated list of exclude glob patterns.
100
+ echo_config_exclude_paths() {
101
+ local raw
102
+ raw=$(echo_config_get_json '.exclude_paths')
103
+ if [[ -n "$raw" ]]; then
104
+ printf '%s' "$raw" | jq -r '.[]' 2>/dev/null
105
+ fi
106
+ # Always exclude Echo's own tree — hardcoded, not overridable.
107
+ printf 'plugins/echo/**\n'
108
+ }
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env bash
2
+ # Canonical echo.* event emission.
3
+ # Thin wrapper around the ecosystem plugin's onlooker-event.mjs `emit` mode.
4
+
5
+ _ECHO_PLUGIN_NAME="echo"
6
+
7
+ _echo_event_js_path() {
8
+ if [[ -n "${_ONLOOKER_EVENT_JS:-}" && -f "$_ONLOOKER_EVENT_JS" ]]; then
9
+ printf '%s' "$_ONLOOKER_EVENT_JS"
10
+ return 0
11
+ fi
12
+ local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
13
+ local candidates=(
14
+ "${plugin_root}/scripts/lib/onlooker-event.mjs"
15
+ "${plugin_root}/../../scripts/lib/onlooker-event.mjs"
16
+ )
17
+ local c
18
+ for c in "${candidates[@]}"; do
19
+ [[ -f "$c" ]] && { printf '%s' "$c"; return 0; }
20
+ done
21
+ return 1
22
+ }
23
+
24
+ _echo_session_id() {
25
+ if [[ -n "${_HOOK_SESSION_ID:-}" ]]; then
26
+ printf '%s' "$_HOOK_SESSION_ID"
27
+ return 0
28
+ fi
29
+ if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then
30
+ printf '%s' "$CLAUDE_SESSION_ID"
31
+ return 0
32
+ fi
33
+ printf 'unknown'
34
+ }
35
+
36
+ echo_emit_event() {
37
+ local event_type="${1:-}"
38
+ local payload="${2:-}"
39
+ [[ -z "$event_type" || -z "$payload" ]] && return 1
40
+
41
+ local event_js
42
+ event_js=$(_echo_event_js_path) || {
43
+ printf 'echo-events: cannot locate onlooker-event.mjs\n' >&2
44
+ return 1
45
+ }
46
+
47
+ local session_id
48
+ session_id=$(_echo_session_id)
49
+
50
+ local params
51
+ params=$(jq -n \
52
+ --arg plugin "$_ECHO_PLUGIN_NAME" \
53
+ --arg sid "$session_id" \
54
+ --arg type "$event_type" \
55
+ --argjson payload "$payload" \
56
+ '{plugin: $plugin, session_id: $sid, event_type: $type, payload: $payload}')
57
+
58
+ local event stderr_file
59
+ stderr_file=$(mktemp -t echo-event-err.XXXXXX 2>/dev/null) || stderr_file="/tmp/echo-event-err.$$"
60
+ event=$(printf '%s' "$params" \
61
+ | ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
62
+ ONLOOKER_PLUGIN_NAME="$_ECHO_PLUGIN_NAME" \
63
+ node "$event_js" emit 2>"$stderr_file") || {
64
+ printf 'echo-events: schema validation failed for %s\n' "$event_type" >&2
65
+ [[ -s "$stderr_file" ]] && cat "$stderr_file" >&2
66
+ rm -f "$stderr_file"
67
+ return 1
68
+ }
69
+ rm -f "$stderr_file"
70
+
71
+ local log_path="${ONLOOKER_EVENTS_LOG:-${ONLOOKER_DIR:-$HOME/.onlooker}/logs/onlooker-events.jsonl}"
72
+ mkdir -p "$(dirname "$log_path")" 2>/dev/null || return 1
73
+ printf '%s\n' "$event" >>"$log_path"
74
+ }