@onlooker-community/ecosystem 0.28.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.claude-plugin/marketplace.json +13 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +2 -2
  4. package/CHANGELOG.md +7 -0
  5. package/CLAUDE.md +2 -0
  6. package/docs/plugin-catalog.md +125 -0
  7. package/package.json +3 -3
  8. package/plugins/compass/.claude-plugin/plugin.json +1 -1
  9. package/plugins/compass/CHANGELOG.md +7 -0
  10. package/plugins/compass/README.md +1 -3
  11. package/plugins/compass/config.json +1 -2
  12. package/plugins/compass/docs/design.md +1 -2
  13. package/plugins/compass/scripts/hooks/compass-bash-gate.sh +8 -1
  14. package/plugins/compass/scripts/hooks/compass-pre-tool-use.sh +8 -1
  15. package/plugins/compass/scripts/hooks/compass-record-write.sh +5 -0
  16. package/plugins/compass/scripts/hooks/compass-session-start.sh +0 -8
  17. package/plugins/compass/scripts/lib/compass-evaluator.sh +58 -98
  18. package/plugins/compass/scripts/lib/compass-gate.sh +15 -18
  19. package/plugins/compass/scripts/lib/compass-sanitizer.sh +4 -4
  20. package/plugins/compass/scripts/lib/compass-transcript.sh +79 -112
  21. package/plugins/inspector/.claude-plugin/plugin.json +14 -0
  22. package/plugins/inspector/README.md +155 -0
  23. package/plugins/inspector/config.json +25 -0
  24. package/plugins/inspector/docs/design.md +286 -0
  25. package/plugins/inspector/hooks/hooks.json +33 -0
  26. package/plugins/inspector/scripts/hooks/inspector-post-write.sh +124 -0
  27. package/plugins/inspector/scripts/lib/inspector-config.sh +108 -0
  28. package/plugins/inspector/scripts/lib/inspector-events.sh +82 -0
  29. package/plugins/inspector/scripts/lib/inspector-project-key.sh +55 -0
  30. package/plugins/inspector/scripts/lib/inspector-run.sh +305 -0
  31. package/plugins/inspector/scripts/lib/inspector-ulid.sh +45 -0
  32. package/test/bats/archivist-project-key.bats +79 -0
  33. package/test/bats/archivist-storage.bats +79 -0
  34. package/test/bats/compact-tracker.bats +125 -0
  35. package/test/bats/compass-config.bats +65 -0
  36. package/test/bats/compass-gate.bats +129 -0
  37. package/test/bats/compass-sanitizer.bats +69 -0
  38. package/test/bats/compass-symbolic-skip.bats +88 -0
  39. package/test/bats/compass-transcript.bats +80 -0
  40. package/test/bats/inspector-config.bats +118 -0
  41. package/test/bats/inspector-events.bats +156 -0
  42. package/test/bats/inspector-post-write-hook.bats +164 -0
  43. package/test/bats/inspector-project-key.bats +68 -0
  44. package/test/bats/inspector-ulid.bats +34 -0
  45. package/test/bats/onlooker-schema.bats +111 -0
  46. package/test/bats/prompt-rules.bats +98 -0
  47. package/test/bats/session-tracker.bats +260 -0
  48. package/test/bats/skill-usage-tracker.bats +63 -0
  49. package/test/bats/task-tracker.bats +102 -0
  50. package/test/bats/turn-tracker.bats +180 -0
  51. package/test/bats/validate-path.bats +125 -0
  52. package/test/bats/worktree-tracker.bats +167 -0
@@ -215,6 +215,19 @@
215
215
  "license": "MIT",
216
216
  "keywords": ["provenance", "blame", "history", "tool-use", "transcript", "audit"],
217
217
  "tags": ["observability", "provenance"]
218
+ },
219
+ {
220
+ "name": "inspector",
221
+ "source": "./plugins/inspector",
222
+ "description": "Per-edit lint and typecheck gate. Runs the project's configured checks on just the touched file after every Write/Edit/MultiEdit, so the agent sees its own lint and type errors before claiming success. Cheaper than running the full project verify; complements assayer (which catches the agent lying about claims). Emits inspector.* events for downstream analysis. Requires the ecosystem plugin.",
223
+ "author": {
224
+ "name": "Onlooker Community"
225
+ },
226
+ "homepage": "https://onlooker.dev",
227
+ "repository": "https://github.com/onlooker-community/ecosystem",
228
+ "license": "MIT",
229
+ "keywords": ["verification", "lint", "typecheck", "post-tool-use", "audit", "feedback"],
230
+ "tags": ["verification", "observability"]
218
231
  }
219
232
  ]
220
233
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ecosystem",
3
- "version": "0.28.1",
3
+ "version": "0.29.0",
4
4
  "description": "Observability substrate for Claude Code. Provides the shared $ONLOOKER_DIR storage root (default $HOME/.onlooker), canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -1,11 +1,11 @@
1
1
  {
2
- ".": "0.28.1",
2
+ ".": "0.29.0",
3
3
  "plugins/archivist": "0.1.0",
4
4
  "plugins/tribunal": "1.0.1",
5
5
  "plugins/echo": "0.2.0",
6
6
  "plugins/cartographer": "0.2.1",
7
7
  "plugins/governor": "0.2.1",
8
- "plugins/compass": "0.2.1",
8
+ "plugins/compass": "0.3.0",
9
9
  "plugins/scribe": "0.2.1",
10
10
  "plugins/counsel": "0.3.1",
11
11
  "plugins/warden": "0.2.0",
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.29.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.28.1...ecosystem-v0.29.0) (2026-06-15)
4
+
5
+
6
+ ### Features
7
+
8
+ * **inspector:** ship the per-edit lint/typecheck plugin ([#88](https://github.com/onlooker-community/ecosystem/issues/88)) ([2018243](https://github.com/onlooker-community/ecosystem/commit/201824384abd6a4fc5f4395266924aa413a2ffd1))
9
+
3
10
  ## [0.28.1](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.28.0...ecosystem-v0.28.1) (2026-06-12)
4
11
 
5
12
 
package/CLAUDE.md CHANGED
@@ -16,6 +16,7 @@ plugins/
16
16
  compass/ ← pre-write alignment gate (design phase)
17
17
  echo/ ← prompt-change regression detection
18
18
  governor/ ← resource governance and budget enforcement
19
+ inspector/ ← per-edit lint and typecheck gate
19
20
  lineage/ ← per-change provenance ("why does this line exist?")
20
21
  tribunal/ ← multi-agent quality gate (Actor → Jury → Meta-Judge → Gate)
21
22
 
@@ -42,6 +43,7 @@ scripts/lib/onlooker-event.mjs ← canonical event builder; all plugins route t
42
43
  | assayer | Stop | Verifies the agent's final-message claims against actual command results in the transcript; advisory |
43
44
  | bursar | SessionStart, SessionEnd | Rolls each session's spend into a per-project ledger on SessionEnd; surfaces "this project burned $X this week" at SessionStart. Governor is per-session; bursar is the cross-session rollup |
44
45
  | lineage | PostToolUse (Edit, Write, MultiEdit) + skill invocation | Records per-change provenance (session_id/turn + redacted, size-capped snippets) into a per-project ledger; `/lineage <file>:<line>` answers "why does this line exist?" by joining records to historian transcripts to recover prompt context |
46
+ | inspector | PostToolUse (Write, Edit, MultiEdit) | Per-edit verification: runs the project's configured lint + typecheck commands on just the touched file and emits `inspector.check.*` / `inspector.run.completed`. Surfaces issues to the agent for the next turn. Cheaper than the planned proctor (which runs the full project verify at Stop); complements assayer (which catches claims the agent makes without running anything) |
45
47
 
46
48
  Plugins communicate by emitting events to the JSONL log — they do not call each other directly. All plugins depend on the ecosystem substrate; no plugin depends on another plugin directly.
47
49
 
@@ -0,0 +1,125 @@
1
+ # Plugin Catalog
2
+
3
+ The full set of Onlooker plugins — shipped and planned — grouped by the layer of agent behavior they address. Each entry is a sketch: name, status, hook surface, one-line purpose. Full design docs land in each plugin's own directory as the plugin is built.
4
+
5
+ **Status legend**
6
+
7
+ - **shipped** — code lives under `plugins/<name>/` and is exercised by the test suite
8
+ - **design** — design doc exists, no implementation
9
+ - **planned** — named only; this catalog is the first reference
10
+
11
+ **Layer map**
12
+
13
+ | Layer | What it does |
14
+ |---|---|
15
+ | quality | Judges output after the fact |
16
+ | governance | Enforces resource and policy limits |
17
+ | testing | Detects regressions in agents and prompts |
18
+ | safety | Blocks harmful or ambiguous actions before they land |
19
+ | analysis | Produces structured artifacts about the session and the repo |
20
+ | memory | Persists context across compaction and across sessions |
21
+ | discovery / routing | Surfaces the right ecosystem affordance for the moment |
22
+ | verification / execution | Runs the agent's output and reports whether it actually worked |
23
+ | feedback / adaptation | Detects user signals (corrections, reverts) and feeds them back |
24
+ | provenance | Links artifacts (files, decisions, commits) back to the prompts and agents that produced them |
25
+
26
+ ---
27
+
28
+ ## Quality
29
+
30
+ Post-hoc judgment of agent output.
31
+
32
+ - **tribunal** — shipped — Stop + skill. Multi-agent quality gate: Actor → typed Judges → Meta-Judge → gate decides accept / retry / exhaust.
33
+ - **muse** — planned — UserPromptSubmit. Optional prompt-clarification pass that rewrites a vague prompt into a sharper one before the agent acts. Distinct from compass (which blocks) — muse reshapes.
34
+ - **rubric** — planned — skill only. Manages and versions the scoring rubrics that tribunal and echo consume; `/rubric` diffs or rolls back rubric revisions.
35
+
36
+ ## Governance
37
+
38
+ Resource and policy enforcement.
39
+
40
+ - **governor** — shipped — SessionStart, PreToolUse(Task), PostToolUse(Task), Stop. Per-session token and cost spend tracking; gates Task spawns against a configurable budget ceiling.
41
+ - **bursar** — planned — SessionEnd. Per-project, multi-session budget accounting; surfaces "this project burned $X this week" at SessionStart. Governor is per-session; bursar is the rollup.
42
+ - **arbiter** — planned — PreToolUse. Resolves cross-plugin conflicts (e.g., warden gate is closed but tribunal wants to spawn an Actor) using a declared precedence policy.
43
+
44
+ ## Testing
45
+
46
+ Regression detection for agents and prompts.
47
+
48
+ - **echo** — shipped — Stop. Single-judge quality pass when a watched agent file changes; compares the score against a stored baseline to report improved / degraded / neutral.
49
+ - **canary** — planned — cron / scheduled. Synthetic prompts run against watched agents on a schedule; detects drift without waiting for a file edit.
50
+ - **gauntlet** — planned — skill only. Adversarial fixture suite (jailbreaks, ambiguous prompts, edge cases) run on demand against a chosen agent.
51
+
52
+ ## Safety
53
+
54
+ Block harmful or ambiguous actions before they land.
55
+
56
+ - **compass** — shipped — PreToolUse(Write, Edit, MultiEdit, Bash). Pre-write intent clarity gate. N=5 parallel Haiku evaluators score whether two independent readers would converge on the same interpretation; blocks below threshold.
57
+ - **warden** — shipped — PostToolUse(WebFetch, Read), PreToolUse(Write, Edit, MultiEdit, Bash), SessionStart. Scans ingested content for prompt-injection patterns; closes a session-scoped gate that blocks write-class tools until cleared.
58
+
59
+ ## Analysis
60
+
61
+ Structured artifacts describing the session and the repo.
62
+
63
+ - **cartographer** — shipped — SessionStart, PostToolUse(Write, Edit, MultiEdit). Audits the persistent instruction layer (CLAUDE.md, AGENTS.md, .claude/rules/) for contradictions, shadowing, and drift.
64
+ - **counsel** — shipped — SessionStart. Weekly synthesis brief across all plugin event logs; injected when the last brief is stale.
65
+ - **scribe** — shipped — SessionEnd. Distills the session's "why" — problem context, decisions, tradeoffs — into a readable artifact.
66
+
67
+ ## Memory
68
+
69
+ Context that survives compaction and sessions.
70
+
71
+ - **archivist** — shipped — PreCompact, SessionStart. Extracts decisions, dead-ends, and open questions on compaction; reinjects the most important ones at the next SessionStart.
72
+ - **historian** — shipped — SessionEnd. Chunks and sanitizes the session transcript and stores chunks locally for future retrieval. Indexing pipeline only; retrieval lands in a follow-up.
73
+ - **librarian** — shipped — SessionEnd, skill. Consolidates archivist's per-session artifacts into the user's durable typed memory store; queues classified proposals for explicit confirmation.
74
+ - **curator** — shipped — SessionStart, skill. Maintenance pass over the typed memory store: four cheap heuristic checks (date decay, broken paths, broken index, orphaned memory) inside a wall-clock budget; surfaces findings, never edits the store directly.
75
+
76
+ ## Discovery / Routing
77
+
78
+ Help the agent and the user find the right affordance for the moment.
79
+
80
+ - **wayfinder** — planned — UserPromptSubmit. Ranks ecosystem plugins, skills, and agents against the current prompt; surfaces the top 1–2 as a `wayfinder.suggestion` event.
81
+ - **herald** — planned — SessionStart. Announces plugins, skills, or agents added since the user's last session in this project. One-time per item, dismissable.
82
+ - **dispatcher** — planned — UserPromptSubmit. Narrow intent classifier ("commit", "ship a PR", "review changes") that maps directly to the canonical skill. Narrower than wayfinder; fewer false positives.
83
+
84
+ ## Verification / Execution
85
+
86
+ Run the agent's output. Report what actually happened.
87
+
88
+ - **proctor** — planned — Stop, PostToolUse(Edit, Write). Runs the project's verification command (configurable: `npm test`, `mise run check`, `cargo test`, …) after writes or at Stop; emits `proctor.verify.passed` or `.failed`.
89
+ - **assayer** — planned — Stop. Parses the agent's final message for testable claims ("I ran the tests", "the build passes") and verifies them against actual exit codes in the session log. Catches lying-without-malice.
90
+ - **inspector** — planned — PostToolUse(Edit, Write). Runs lint and typecheck on just the touched files. Cheaper than proctor; fires far more often.
91
+
92
+ ## Feedback / Adaptation
93
+
94
+ Detect user signals and feed them back into the system.
95
+
96
+ - **attendant** — planned — UserPromptSubmit. Detects course-corrections in the user's prompt ("no", "stop", "don't", revert patterns); emits `attendant.pushback.detected` for other plugins to consume.
97
+ - **interpreter** — planned — consumes attendant events. Classifies pushback tone (frustrated / clarifying / neutral) so downstream plugins don't overreact to clarifying questions.
98
+ - **adept** — planned — SessionStart. Accumulates pushback patterns over sessions; injects "you've corrected this pattern N times" hints. Closes the loop that echo opens for prompt files.
99
+
100
+ ## Provenance
101
+
102
+ Link artifacts back to the prompts and agents that produced them.
103
+
104
+ - **lineage** — planned — PostToolUse(Edit, Write, MultiEdit). Records the prompt + agent + session that produced each file change; builds a queryable graph by joining historian transcripts with tool-use events. Answers "why does this line exist?"
105
+ - **ledger** — planned — PostToolUse(*) write-class. Append-only audit record of every write-class tool call with the prompt and agent context attached. `/ledger` queries by file, prompt substring, or time range.
106
+ - **witness** — planned — Stop. Captures the deciding assistant turn — the moment the agent committed to a course of action — and stores it as a discrete artifact. Distinct from scribe (which writes a narrative) — witness preserves the pivot itself.
107
+
108
+ ---
109
+
110
+ ## Coverage check
111
+
112
+ | Layer | Plugins |
113
+ |---|---|
114
+ | quality | tribunal, muse, rubric |
115
+ | governance | governor, bursar, arbiter |
116
+ | testing | echo, canary, gauntlet |
117
+ | safety | compass, warden |
118
+ | analysis | cartographer, counsel, scribe |
119
+ | memory | archivist, historian, librarian, curator |
120
+ | discovery / routing | wayfinder, herald, dispatcher |
121
+ | verification / execution | proctor, assayer, inspector |
122
+ | feedback / adaptation | attendant, interpreter, adept |
123
+ | provenance | lineage, ledger, witness |
124
+
125
+ Every layer holds at least two plugins; most hold three. Total: 12 shipped, 0 design, 17 planned.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onlooker-community/ecosystem",
3
- "version": "0.28.1",
3
+ "version": "0.29.0",
4
4
  "description": "Agents, skills, hooks, commands, rules, and MCP configurations that power [Onlooker](https://onlooker.dev)",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -19,14 +19,14 @@
19
19
  "onlooker-install": "install.sh"
20
20
  },
21
21
  "dependencies": {
22
- "@onlooker-community/schema": "^2.8.0"
22
+ "@onlooker-community/schema": "^2.9.0"
23
23
  },
24
24
  "scripts": {
25
25
  "postinstall": "echo '\\n onlooker-ecosystem installed!\\n Run: npx onlooker-install typescript\\n Docs: https://github.com/onlooker-community/ecosystem\\n'",
26
26
  "test": "npm run test:bats && npm run test:schema",
27
27
  "test:bats": "bats test/bats",
28
28
  "test:schema": "node --test test/node/*.test.mjs",
29
- "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh plugins/assayer/scripts/hooks/*.sh plugins/assayer/scripts/lib/*.sh plugins/cartographer/scripts/hooks/*.sh plugins/cartographer/scripts/lib/*.sh plugins/bursar/scripts/hooks/*.sh plugins/bursar/scripts/lib/*.sh plugins/lineage/scripts/hooks/*.sh plugins/lineage/scripts/lib/*.sh",
29
+ "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh plugins/assayer/scripts/hooks/*.sh plugins/assayer/scripts/lib/*.sh plugins/cartographer/scripts/hooks/*.sh plugins/cartographer/scripts/lib/*.sh plugins/bursar/scripts/hooks/*.sh plugins/bursar/scripts/lib/*.sh plugins/lineage/scripts/hooks/*.sh plugins/lineage/scripts/lib/*.sh plugins/inspector/scripts/hooks/*.sh plugins/inspector/scripts/lib/*.sh",
30
30
  "lint:references": "node scripts/lint/check-references.mjs",
31
31
  "lint:manifests": "node scripts/lint/check-manifests.mjs",
32
32
  "coverage:node": "node scripts/coverage/run-coverage.mjs",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "compass",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Pre-write intent clarity gate. Intercepts write-class tool calls and requires a confidence threshold before allowing them to proceed. Evaluates the pending write against the prior assistant turn as context to avoid false positives on question-answer turns. Builds on the Onlooker ecosystem plugin.",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.0](https://github.com/onlooker-community/ecosystem/compare/compass-v0.2.1...compass-v0.3.0) (2026-06-15)
4
+
5
+
6
+ ### Features
7
+
8
+ * **inspector:** ship the per-edit lint/typecheck plugin ([#88](https://github.com/onlooker-community/ecosystem/issues/88)) ([2018243](https://github.com/onlooker-community/ecosystem/commit/201824384abd6a4fc5f4395266924aa413a2ffd1))
9
+
3
10
  ## [0.2.1](https://github.com/onlooker-community/ecosystem/compare/compass-v0.2.0...compass-v0.2.1) (2026-06-12)
4
11
 
5
12
 
@@ -74,8 +74,7 @@ All keys are optional. Unset keys fall back to the plugin's `config.json` defaul
74
74
  "identity_match": "dir_plus_stem"
75
75
  },
76
76
  "transcript": {
77
- "prior_turn_chars_max": 800,
78
- "transcript_max_age_seconds": 300
77
+ "prior_turn_chars_max": 800
79
78
  },
80
79
  "skip_patterns": {
81
80
  "reply_to_question": {
@@ -115,7 +114,6 @@ All keys are optional. Unset keys fall back to the plugin's `config.json` defaul
115
114
  | `cooldown.seconds` | `120` | A write whose path shares a parent directory and filename stem with a recent successful write is skipped within this window. |
116
115
  | `cooldown.identity_match` | `dir_plus_stem` | Cooldown identity strategy. Stem comparison strips only the final extension; the cooldown does not carry across a rename. |
117
116
  | `transcript.prior_turn_chars_max` | `800` | Maximum characters of the prior assistant turn fed into the evaluator. Set to `0` to omit the prior turn for near-zero egress. |
118
- | `transcript.transcript_max_age_seconds` | `300` | Maximum age of the transcript file Compass will read the prior turn from. |
119
117
  | `skip_patterns.reply_to_question.enabled` | `true` | Enables the symbolic skip layer. When disabled, every write that passes the trigger gate reaches the LLM evaluator. |
120
118
  | `max_checks_per_turn` | `3` | Per-turn evaluation budget. Writes beyond this skip with reason `turn_budget_exhausted`. |
121
119
  | `min_context_chars` | `80` | Minimum sanitized context length. Shorter context skips with reason `insufficient_context`. |
@@ -20,8 +20,7 @@
20
20
  "identity_match": "dir_plus_stem"
21
21
  },
22
22
  "transcript": {
23
- "prior_turn_chars_max": 800,
24
- "transcript_max_age_seconds": 300
23
+ "prior_turn_chars_max": 800
25
24
  },
26
25
  "skip_patterns": {
27
26
  "reply_to_question": {
@@ -223,8 +223,7 @@ The re-check is capped at one per intervention. After one re-check, the three pa
223
223
  "identity_match": "dir_plus_stem"
224
224
  },
225
225
  "transcript": {
226
- "prior_turn_chars_max": 800,
227
- "transcript_max_age_seconds": 300
226
+ "prior_turn_chars_max": 800
228
227
  },
229
228
  "skip_patterns": {
230
229
  "reply_to_question": {
@@ -13,6 +13,12 @@
13
13
 
14
14
  set -uo pipefail
15
15
 
16
+ # Recursion guard — must be first.
17
+ # When the evaluator shells out to `claude -p`, that subprocess can
18
+ # trigger its own Bash hooks, which would re-enter Compass.
19
+ [[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
20
+ export COMPASS_NESTED=1
21
+
16
22
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
23
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
18
24
 
@@ -35,6 +41,7 @@ INPUT=$(cat)
35
41
  SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
36
42
  CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
37
43
  COMMAND=$(printf '%s' "$INPUT" | jq -r '.tool_input.command // ""' 2>/dev/null) || COMMAND=""
44
+ TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
38
45
 
39
46
  export _HOOK_SESSION_ID="$SESSION_ID"
40
47
 
@@ -91,5 +98,5 @@ if ! _is_write_command "$COMMAND"; then
91
98
  exit 0
92
99
  fi
93
100
 
94
- compass_run_gate "Bash" "" "bash_write" "$COMMAND" "$SESSION_ID" "$CWD"
101
+ compass_run_gate "Bash" "" "bash_write" "$COMMAND" "$SESSION_ID" "$CWD" "$TRANSCRIPT_PATH"
95
102
  exit $?
@@ -12,6 +12,12 @@
12
12
 
13
13
  set -uo pipefail
14
14
 
15
+ # Recursion guard — must be first.
16
+ # When the evaluator shells out to `claude -p`, that subprocess can
17
+ # trigger its own Write/Edit hooks, which would re-enter Compass.
18
+ [[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
19
+ export COMPASS_NESTED=1
20
+
15
21
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
16
22
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
17
23
 
@@ -34,6 +40,7 @@ INPUT=$(cat)
34
40
  SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
35
41
  CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
36
42
  TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // ""' 2>/dev/null) || TOOL_NAME=""
43
+ TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
37
44
 
38
45
  export _HOOK_SESSION_ID="$SESSION_ID"
39
46
 
@@ -87,5 +94,5 @@ case "$TOOL_NAME" in
87
94
  ;;
88
95
  esac
89
96
 
90
- compass_run_gate "$TOOL_NAME" "$FILE_PATH" "$OPERATION" "$CONTEXT" "$SESSION_ID" "$CWD"
97
+ compass_run_gate "$TOOL_NAME" "$FILE_PATH" "$OPERATION" "$CONTEXT" "$SESSION_ID" "$CWD" "$TRANSCRIPT_PATH"
91
98
  exit $?
@@ -11,6 +11,11 @@
11
11
 
12
12
  set -uo pipefail
13
13
 
14
+ # Recursion guard — must be first.
15
+ # A nested `claude -p` Write would otherwise re-enter the cooldown writer.
16
+ [[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
17
+ export COMPASS_NESTED=1
18
+
14
19
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
20
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
16
21
 
@@ -18,14 +18,6 @@ set -uo pipefail
18
18
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
19
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
20
20
 
21
- _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
22
- if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
23
- _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
24
- if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
25
- _ECOSYSTEM_ROOT="$_candidate"
26
- fi
27
- fi
28
-
29
21
  export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
30
22
 
31
23
  # shellcheck source=../lib/compass-config.sh
@@ -1,20 +1,22 @@
1
1
  #!/usr/bin/env bash
2
- # N=5 parallel Haiku evaluator for Compass.
2
+ # N=5 parallel `claude -p` evaluator for Compass.
3
3
  #
4
- # Launches N independent evaluator calls, aggregates scores, and returns
5
- # a decision (pass/fail) with confidence and stddev.
4
+ # Launches N independent evaluator calls via `claude -p --max-turns 1`,
5
+ # aggregates scores, and returns a decision (pass / fail / error) with
6
+ # confidence and stddev.
6
7
  #
7
8
  # Exposes:
8
9
  # compass_evaluate <tool_name> <file_path> <operation> \
9
10
  # <prior_turn> <context_excerpt> <session_id>
10
11
  #
11
- # Exits 0 if confidence >= threshold AND stddev <= stddev_threshold.
12
- # Exits 1 if confidence < threshold OR stddev > stddev_threshold (block).
13
- # Exits 2 on evaluator error (respects error_policy).
14
- #
15
12
  # Writes a JSON result object to stdout:
16
13
  # {"decision":"pass|fail|error","confidence":<f>,"stddev":<f>,
17
14
  # "primary_concern":"<str>","rationale":"<str>","sample_count":<n>}
15
+ #
16
+ # Exit codes:
17
+ # 0 pass (confidence >= threshold AND stddev <= stddev_threshold)
18
+ # 1 fail (block)
19
+ # 2 error (respects error_policy)
18
20
 
19
21
  _COMPASS_EVAL_PROMPT_NO_PRIOR='You are evaluating whether a pending write operation has sufficient intent clarity.
20
22
 
@@ -78,87 +80,65 @@ path: FILE_PATH_PLACEHOLDER
78
80
  operation: OPERATION_PLACEHOLDER
79
81
  </tool_input>'
80
82
 
81
- # Run a single evaluator call. Writes JSON to a temp file at $output_file.
83
+ # Strip leading/trailing markdown fences a model occasionally emits.
84
+ _compass_strip_fences() {
85
+ printf '%s' "$1" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//'
86
+ }
87
+
88
+ # Run a single evaluator call via `claude -p`. Writes JSON to $output_file.
82
89
  # $1 — prompt text
83
90
  # $2 — model
84
- # $3 — temperature (as string, e.g. "0.3")
85
- # $4 — max_output_tokens
86
- # $5 — output file path
87
- # $6 — API key env var name (default: ANTHROPIC_API_KEY)
91
+ # $3 — timeout (seconds)
92
+ # $4 — output file path
88
93
  _compass_run_single_eval() {
89
94
  local prompt="$1"
90
95
  local model="$2"
91
- local temperature="$3"
92
- local max_tokens="$4"
93
- local output_file="$5"
94
- local api_key_var="${6:-ANTHROPIC_API_KEY}"
95
- local api_key="${!api_key_var:-}"
96
-
97
- [[ -z "$api_key" ]] && { printf '{"error":"no_api_key"}' > "$output_file"; return 1; }
98
-
99
- local request_body
100
- request_body=$(jq -n \
101
- --arg model "$model" \
102
- --argjson temp "$temperature" \
103
- --argjson max_tokens "$max_tokens" \
104
- --arg prompt "$prompt" \
105
- '{
106
- model: $model,
107
- max_tokens: $max_tokens,
108
- temperature: $temp,
109
- messages: [{"role": "user", "content": $prompt}]
110
- }' 2>/dev/null) || { printf '{"error":"request_build_failed"}' > "$output_file"; return 1; }
111
-
112
- local http_response http_code response_body
113
- http_response=$(curl -s -w '\n%{http_code}' \
114
- -X POST "https://api.anthropic.com/v1/messages" \
115
- -H "x-api-key: ${api_key}" \
116
- -H "anthropic-version: 2023-06-01" \
117
- -H "content-type: application/json" \
118
- -d "$request_body" \
119
- --max-time 15 \
120
- 2>/dev/null) || { printf '{"error":"curl_failed"}' > "$output_file"; return 1; }
121
-
122
- http_code=$(printf '%s' "$http_response" | tail -n1)
123
- response_body=$(printf '%s' "$http_response" | head -n -1)
124
-
125
- if [[ "$http_code" == "429" ]]; then
126
- sleep 2
127
- http_response=$(curl -s -w '\n%{http_code}' \
128
- -X POST "https://api.anthropic.com/v1/messages" \
129
- -H "x-api-key: ${api_key}" \
130
- -H "anthropic-version: 2023-06-01" \
131
- -H "content-type: application/json" \
132
- -d "$request_body" \
133
- --max-time 15 \
134
- 2>/dev/null) || { printf '{"error":"curl_failed_retry"}' > "$output_file"; return 1; }
135
- http_code=$(printf '%s' "$http_response" | tail -n1)
136
- response_body=$(printf '%s' "$http_response" | head -n -1)
137
- fi
96
+ local timeout_secs="$3"
97
+ local output_file="$4"
138
98
 
139
- if [[ "$http_code" != "200" ]]; then
140
- printf '{"error":"http_%s"}' "$http_code" > "$output_file"
99
+ if ! command -v claude >/dev/null 2>&1; then
100
+ printf '{"error":"claude_cli_missing"}' > "$output_file"
141
101
  return 1
142
102
  fi
143
103
 
144
- local content
145
- content=$(printf '%s' "$response_body" | jq -r '.content[0].text // empty' 2>/dev/null) || {
146
- printf '{"error":"parse_failed"}' > "$output_file"
104
+ local prompt_file
105
+ prompt_file=$(mktemp -t compass-prompt.XXXXXX 2>/dev/null) || prompt_file="/tmp/compass-prompt.$$.${RANDOM}"
106
+ printf '%s' "$prompt" > "$prompt_file"
107
+
108
+ local args=(-p --max-turns 1)
109
+ [[ -n "$model" ]] && args+=(--model "$model")
110
+
111
+ local response=""
112
+ if command -v timeout >/dev/null 2>&1; then
113
+ response=$(COMPASS_NESTED=1 timeout "$timeout_secs" claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
114
+ elif command -v gtimeout >/dev/null 2>&1; then
115
+ response=$(COMPASS_NESTED=1 gtimeout "$timeout_secs" claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
116
+ else
117
+ response=$(COMPASS_NESTED=1 claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
118
+ fi
119
+
120
+ rm -f "$prompt_file" 2>/dev/null || true
121
+
122
+ if [[ -z "$response" ]]; then
123
+ printf '{"error":"empty_response"}' > "$output_file"
147
124
  return 1
148
- }
125
+ fi
149
126
 
150
- # Validate the model returned parseable JSON with a score field.
127
+ local clean
128
+ clean=$(_compass_strip_fences "$response")
129
+
130
+ # Confirm the model returned a JSON object with a numeric score.
151
131
  local score
152
- score=$(printf '%s' "$content" | jq -r '.score // empty' 2>/dev/null) || score=""
132
+ score=$(printf '%s' "$clean" | jq -r '.score // empty' 2>/dev/null) || score=""
153
133
  if [[ -z "$score" ]]; then
154
134
  printf '{"error":"invalid_json_response"}' > "$output_file"
155
135
  return 1
156
136
  fi
157
137
 
158
- printf '%s' "$content" > "$output_file"
138
+ printf '%s' "$clean" > "$output_file"
159
139
  }
160
140
 
161
- # Build the evaluator prompt.
141
+ # Build the evaluator prompt by interpolating the data slots.
162
142
  _compass_build_prompt() {
163
143
  local prior_turn="$1"
164
144
  local context_excerpt="$2"
@@ -182,7 +162,7 @@ _compass_build_prompt() {
182
162
  printf '%s' "$template"
183
163
  }
184
164
 
185
- # Compute mean of space-separated floats.
165
+ # Mean of space-separated floats.
186
166
  _compass_mean() {
187
167
  local scores=("$@")
188
168
  local n="${#scores[@]}"
@@ -195,7 +175,7 @@ _compass_mean() {
195
175
  awk "BEGIN {printf \"%.4f\", $sum / $n}" 2>/dev/null || printf '0'
196
176
  }
197
177
 
198
- # Compute population stddev of space-separated floats.
178
+ # Population stddev of space-separated floats.
199
179
  _compass_stddev() {
200
180
  local scores=("$@")
201
181
  local n="${#scores[@]}"
@@ -213,7 +193,7 @@ _compass_stddev() {
213
193
  # Main evaluator entry point.
214
194
  # $1 — tool_name
215
195
  # $2 — file_path
216
- # $3 — operation (write|edit|multi_edit|bash)
196
+ # $3 — operation (write|edit|multi_edit|bash_write)
217
197
  # $4 — prior_turn (may be empty)
218
198
  # $5 — context_excerpt
219
199
  # $6 — session_id
@@ -225,17 +205,11 @@ compass_evaluate() {
225
205
  local context_excerpt="$5"
226
206
  local session_id="${6:-unknown}"
227
207
 
228
- local model
208
+ local model n_samples timeout_secs min_valid
229
209
  model=$(compass_config_get '.compass.evaluator.model')
230
210
  model="${model:-claude-haiku-4-5-20251001}"
231
-
232
- local n_samples temperature max_tokens timeout_secs min_valid
233
211
  n_samples=$(compass_config_get '.compass.evaluator.n')
234
212
  n_samples="${n_samples:-5}"
235
- temperature=$(compass_config_get '.compass.evaluator.temperature')
236
- temperature="${temperature:-0.3}"
237
- max_tokens=$(compass_config_get '.compass.evaluator.max_output_tokens')
238
- max_tokens="${max_tokens:-128}"
239
213
  timeout_secs=$(compass_config_get '.compass.evaluator.sample_timeout_seconds')
240
214
  timeout_secs="${timeout_secs:-8}"
241
215
  min_valid=$(compass_config_get '.compass.evaluator.min_valid_samples')
@@ -250,34 +224,22 @@ compass_evaluate() {
250
224
  local prompt
251
225
  prompt=$(_compass_build_prompt "$prior_turn" "$context_excerpt" "$tool_name" "$file_path" "$operation")
252
226
 
253
- # Launch N parallel eval calls.
254
227
  local tmp_dir
255
- tmp_dir=$(mktemp -d -t compass-eval.XXXXXX 2>/dev/null) || tmp_dir="/tmp/compass-eval.$$"
228
+ tmp_dir=$(mktemp -d -t compass-eval.XXXXXX 2>/dev/null) || tmp_dir="/tmp/compass-eval.$$.${RANDOM}"
256
229
  mkdir -p "$tmp_dir"
257
230
 
258
231
  local pids=()
259
232
  local i
260
233
  for (( i=0; i<n_samples; i++ )); do
261
234
  local out_file="${tmp_dir}/sample_${i}.json"
262
- (
263
- _compass_run_single_eval \
264
- "$prompt" "$model" "$temperature" "$max_tokens" "$out_file"
265
- ) &
235
+ _compass_run_single_eval \
236
+ "$prompt" "$model" "$timeout_secs" "$out_file" &
266
237
  pids+=($!)
267
238
  done
268
239
 
269
- # Collect with timeout watchdog.
270
- local deadline=$(( $(date +%s) + timeout_secs ))
271
240
  local pid
272
241
  for pid in "${pids[@]}"; do
273
- local now
274
- now=$(date +%s)
275
- local remaining=$(( deadline - now ))
276
- if [[ "$remaining" -gt 0 ]]; then
277
- wait "$pid" 2>/dev/null || true
278
- else
279
- kill "$pid" 2>/dev/null || true
280
- fi
242
+ wait "$pid" 2>/dev/null || true
281
243
  done
282
244
 
283
245
  # Aggregate valid scores.
@@ -307,9 +269,7 @@ compass_evaluate() {
307
269
  error_policy="${error_policy:-closed}"
308
270
 
309
271
  local decision="error"
310
- if [[ "$error_policy" == "open" ]]; then
311
- decision="pass"
312
- fi
272
+ [[ "$error_policy" == "open" ]] && decision="pass"
313
273
 
314
274
  jq -n \
315
275
  --arg decision "$decision" \