npm - @onlooker-community/ecosystem - Versions diffs - 0.28.1 → 0.29.0 - Mend

@onlooker-community/ecosystem 0.28.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/.claude-plugin/marketplace.json +13 -0
package/.claude-plugin/plugin.json +1 -1
package/.release-please-manifest.json +2 -2
package/CHANGELOG.md +7 -0
package/CLAUDE.md +2 -0
package/docs/plugin-catalog.md +125 -0
package/package.json +3 -3
package/plugins/compass/.claude-plugin/plugin.json +1 -1
package/plugins/compass/CHANGELOG.md +7 -0
package/plugins/compass/README.md +1 -3
package/plugins/compass/config.json +1 -2
package/plugins/compass/docs/design.md +1 -2
package/plugins/compass/scripts/hooks/compass-bash-gate.sh +8 -1
package/plugins/compass/scripts/hooks/compass-pre-tool-use.sh +8 -1
package/plugins/compass/scripts/hooks/compass-record-write.sh +5 -0
package/plugins/compass/scripts/hooks/compass-session-start.sh +0 -8
package/plugins/compass/scripts/lib/compass-evaluator.sh +58 -98
package/plugins/compass/scripts/lib/compass-gate.sh +15 -18
package/plugins/compass/scripts/lib/compass-sanitizer.sh +4 -4
package/plugins/compass/scripts/lib/compass-transcript.sh +79 -112
package/plugins/inspector/.claude-plugin/plugin.json +14 -0
package/plugins/inspector/README.md +155 -0
package/plugins/inspector/config.json +25 -0
package/plugins/inspector/docs/design.md +286 -0
package/plugins/inspector/hooks/hooks.json +33 -0
package/plugins/inspector/scripts/hooks/inspector-post-write.sh +124 -0
package/plugins/inspector/scripts/lib/inspector-config.sh +108 -0
package/plugins/inspector/scripts/lib/inspector-events.sh +82 -0
package/plugins/inspector/scripts/lib/inspector-project-key.sh +55 -0
package/plugins/inspector/scripts/lib/inspector-run.sh +305 -0
package/plugins/inspector/scripts/lib/inspector-ulid.sh +45 -0
package/test/bats/archivist-project-key.bats +79 -0
package/test/bats/archivist-storage.bats +79 -0
package/test/bats/compact-tracker.bats +125 -0
package/test/bats/compass-config.bats +65 -0
package/test/bats/compass-gate.bats +129 -0
package/test/bats/compass-sanitizer.bats +69 -0
package/test/bats/compass-symbolic-skip.bats +88 -0
package/test/bats/compass-transcript.bats +80 -0
package/test/bats/inspector-config.bats +118 -0
package/test/bats/inspector-events.bats +156 -0
package/test/bats/inspector-post-write-hook.bats +164 -0
package/test/bats/inspector-project-key.bats +68 -0
package/test/bats/inspector-ulid.bats +34 -0
package/test/bats/onlooker-schema.bats +111 -0
package/test/bats/prompt-rules.bats +98 -0
package/test/bats/session-tracker.bats +260 -0
package/test/bats/skill-usage-tracker.bats +63 -0
package/test/bats/task-tracker.bats +102 -0
package/test/bats/turn-tracker.bats +180 -0
package/test/bats/validate-path.bats +125 -0
package/test/bats/worktree-tracker.bats +167 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -215,6 +215,19 @@
       "license": "MIT",
       "keywords": ["provenance", "blame", "history", "tool-use", "transcript", "audit"],
       "tags": ["observability", "provenance"]
+    },
+    {
+      "name": "inspector",
+      "source": "./plugins/inspector",
+      "description": "Per-edit lint and typecheck gate. Runs the project's configured checks on just the touched file after every Write/Edit/MultiEdit, so the agent sees its own lint and type errors before claiming success. Cheaper than running the full project verify; complements assayer (which catches the agent lying about claims). Emits inspector.* events for downstream analysis. Requires the ecosystem plugin.",
+      "author": {
+        "name": "Onlooker Community"
+      },
+      "homepage": "https://onlooker.dev",
+      "repository": "https://github.com/onlooker-community/ecosystem",
+      "license": "MIT",
+      "keywords": ["verification", "lint", "typecheck", "post-tool-use", "audit", "feedback"],
+      "tags": ["verification", "observability"]
     }
   ]
 }

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ecosystem",
-  "version": "0.28.1",
+  "version": "0.29.0",
   "description": "Observability substrate for Claude Code. Provides the shared $ONLOOKER_DIR storage root (default $HOME/.onlooker), canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
   "author": {
     "name": "Onlooker Community",

package/.release-please-manifest.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
-  ".": "0.28.1",
+  ".": "0.29.0",
   "plugins/archivist": "0.1.0",
   "plugins/tribunal": "1.0.1",
   "plugins/echo": "0.2.0",
   "plugins/cartographer": "0.2.1",
   "plugins/governor": "0.2.1",
-  "plugins/compass": "0.2.1",
+  "plugins/compass": "0.3.0",
   "plugins/scribe": "0.2.1",
   "plugins/counsel": "0.3.1",
   "plugins/warden": "0.2.0",

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Changelog
+## [0.29.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.28.1...ecosystem-v0.29.0) (2026-06-15)
+### Features
+* **inspector:** ship the per-edit lint/typecheck plugin ([#88](https://github.com/onlooker-community/ecosystem/issues/88)) ([2018243](https://github.com/onlooker-community/ecosystem/commit/201824384abd6a4fc5f4395266924aa413a2ffd1))
 ## [0.28.1](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.28.0...ecosystem-v0.28.1) (2026-06-12)

package/CLAUDE.md CHANGED Viewed

@@ -16,6 +16,7 @@ plugins/
   compass/                    ← pre-write alignment gate (design phase)
   echo/                       ← prompt-change regression detection
   governor/                   ← resource governance and budget enforcement
+  inspector/                  ← per-edit lint and typecheck gate
   lineage/                    ← per-change provenance ("why does this line exist?")
   tribunal/                   ← multi-agent quality gate (Actor → Jury → Meta-Judge → Gate)
@@ -42,6 +43,7 @@ scripts/lib/onlooker-event.mjs  ← canonical event builder; all plugins route t
 | assayer | Stop | Verifies the agent's final-message claims against actual command results in the transcript; advisory |
 | bursar | SessionStart, SessionEnd | Rolls each session's spend into a per-project ledger on SessionEnd; surfaces "this project burned $X this week" at SessionStart. Governor is per-session; bursar is the cross-session rollup |
 | lineage | PostToolUse (Edit, Write, MultiEdit) + skill invocation | Records per-change provenance (session_id/turn + redacted, size-capped snippets) into a per-project ledger; `/lineage <file>:<line>` answers "why does this line exist?" by joining records to historian transcripts to recover prompt context |
+| inspector | PostToolUse (Write, Edit, MultiEdit) | Per-edit verification: runs the project's configured lint + typecheck commands on just the touched file and emits `inspector.check.*` / `inspector.run.completed`. Surfaces issues to the agent for the next turn. Cheaper than the planned proctor (which runs the full project verify at Stop); complements assayer (which catches claims the agent makes without running anything) |
 Plugins communicate by emitting events to the JSONL log — they do not call each other directly. All plugins depend on the ecosystem substrate; no plugin depends on another plugin directly.

package/docs/plugin-catalog.md ADDED Viewed

@@ -0,0 +1,125 @@
+# Plugin Catalog
+The full set of Onlooker plugins — shipped and planned — grouped by the layer of agent behavior they address. Each entry is a sketch: name, status, hook surface, one-line purpose. Full design docs land in each plugin's own directory as the plugin is built.
+**Status legend**
+- **shipped** — code lives under `plugins/<name>/` and is exercised by the test suite
+- **design** — design doc exists, no implementation
+- **planned** — named only; this catalog is the first reference
+**Layer map**
+| Layer | What it does |
+|---|---|
+| quality | Judges output after the fact |
+| governance | Enforces resource and policy limits |
+| testing | Detects regressions in agents and prompts |
+| safety | Blocks harmful or ambiguous actions before they land |
+| analysis | Produces structured artifacts about the session and the repo |
+| memory | Persists context across compaction and across sessions |
+| discovery / routing | Surfaces the right ecosystem affordance for the moment |
+| verification / execution | Runs the agent's output and reports whether it actually worked |
+| feedback / adaptation | Detects user signals (corrections, reverts) and feeds them back |
+| provenance | Links artifacts (files, decisions, commits) back to the prompts and agents that produced them |
+---
+## Quality
+Post-hoc judgment of agent output.
+- **tribunal** — shipped — Stop + skill. Multi-agent quality gate: Actor → typed Judges → Meta-Judge → gate decides accept / retry / exhaust.
+- **muse** — planned — UserPromptSubmit. Optional prompt-clarification pass that rewrites a vague prompt into a sharper one before the agent acts. Distinct from compass (which blocks) — muse reshapes.
+- **rubric** — planned — skill only. Manages and versions the scoring rubrics that tribunal and echo consume; `/rubric` diffs or rolls back rubric revisions.
+## Governance
+Resource and policy enforcement.
+- **governor** — shipped — SessionStart, PreToolUse(Task), PostToolUse(Task), Stop. Per-session token and cost spend tracking; gates Task spawns against a configurable budget ceiling.
+- **bursar** — planned — SessionEnd. Per-project, multi-session budget accounting; surfaces "this project burned $X this week" at SessionStart. Governor is per-session; bursar is the rollup.
+- **arbiter** — planned — PreToolUse. Resolves cross-plugin conflicts (e.g., warden gate is closed but tribunal wants to spawn an Actor) using a declared precedence policy.
+## Testing
+Regression detection for agents and prompts.
+- **echo** — shipped — Stop. Single-judge quality pass when a watched agent file changes; compares the score against a stored baseline to report improved / degraded / neutral.
+- **canary** — planned — cron / scheduled. Synthetic prompts run against watched agents on a schedule; detects drift without waiting for a file edit.
+- **gauntlet** — planned — skill only. Adversarial fixture suite (jailbreaks, ambiguous prompts, edge cases) run on demand against a chosen agent.
+## Safety
+Block harmful or ambiguous actions before they land.
+- **compass** — shipped — PreToolUse(Write, Edit, MultiEdit, Bash). Pre-write intent clarity gate. N=5 parallel Haiku evaluators score whether two independent readers would converge on the same interpretation; blocks below threshold.
+- **warden** — shipped — PostToolUse(WebFetch, Read), PreToolUse(Write, Edit, MultiEdit, Bash), SessionStart. Scans ingested content for prompt-injection patterns; closes a session-scoped gate that blocks write-class tools until cleared.
+## Analysis
+Structured artifacts describing the session and the repo.
+- **cartographer** — shipped — SessionStart, PostToolUse(Write, Edit, MultiEdit). Audits the persistent instruction layer (CLAUDE.md, AGENTS.md, .claude/rules/) for contradictions, shadowing, and drift.
+- **counsel** — shipped — SessionStart. Weekly synthesis brief across all plugin event logs; injected when the last brief is stale.
+- **scribe** — shipped — SessionEnd. Distills the session's "why" — problem context, decisions, tradeoffs — into a readable artifact.
+## Memory
+Context that survives compaction and sessions.
+- **archivist** — shipped — PreCompact, SessionStart. Extracts decisions, dead-ends, and open questions on compaction; reinjects the most important ones at the next SessionStart.
+- **historian** — shipped — SessionEnd. Chunks and sanitizes the session transcript and stores chunks locally for future retrieval. Indexing pipeline only; retrieval lands in a follow-up.
+- **librarian** — shipped — SessionEnd, skill. Consolidates archivist's per-session artifacts into the user's durable typed memory store; queues classified proposals for explicit confirmation.
+- **curator** — shipped — SessionStart, skill. Maintenance pass over the typed memory store: four cheap heuristic checks (date decay, broken paths, broken index, orphaned memory) inside a wall-clock budget; surfaces findings, never edits the store directly.
+## Discovery / Routing
+Help the agent and the user find the right affordance for the moment.
+- **wayfinder** — planned — UserPromptSubmit. Ranks ecosystem plugins, skills, and agents against the current prompt; surfaces the top 1–2 as a `wayfinder.suggestion` event.
+- **herald** — planned — SessionStart. Announces plugins, skills, or agents added since the user's last session in this project. One-time per item, dismissable.
+- **dispatcher** — planned — UserPromptSubmit. Narrow intent classifier ("commit", "ship a PR", "review changes") that maps directly to the canonical skill. Narrower than wayfinder; fewer false positives.
+## Verification / Execution
+Run the agent's output. Report what actually happened.
+- **proctor** — planned — Stop, PostToolUse(Edit, Write). Runs the project's verification command (configurable: `npm test`, `mise run check`, `cargo test`, …) after writes or at Stop; emits `proctor.verify.passed` or `.failed`.
+- **assayer** — planned — Stop. Parses the agent's final message for testable claims ("I ran the tests", "the build passes") and verifies them against actual exit codes in the session log. Catches lying-without-malice.
+- **inspector** — planned — PostToolUse(Edit, Write). Runs lint and typecheck on just the touched files. Cheaper than proctor; fires far more often.
+## Feedback / Adaptation
+Detect user signals and feed them back into the system.
+- **attendant** — planned — UserPromptSubmit. Detects course-corrections in the user's prompt ("no", "stop", "don't", revert patterns); emits `attendant.pushback.detected` for other plugins to consume.
+- **interpreter** — planned — consumes attendant events. Classifies pushback tone (frustrated / clarifying / neutral) so downstream plugins don't overreact to clarifying questions.
+- **adept** — planned — SessionStart. Accumulates pushback patterns over sessions; injects "you've corrected this pattern N times" hints. Closes the loop that echo opens for prompt files.
+## Provenance
+Link artifacts back to the prompts and agents that produced them.
+- **lineage** — planned — PostToolUse(Edit, Write, MultiEdit). Records the prompt + agent + session that produced each file change; builds a queryable graph by joining historian transcripts with tool-use events. Answers "why does this line exist?"
+- **ledger** — planned — PostToolUse(*) write-class. Append-only audit record of every write-class tool call with the prompt and agent context attached. `/ledger` queries by file, prompt substring, or time range.
+- **witness** — planned — Stop. Captures the deciding assistant turn — the moment the agent committed to a course of action — and stores it as a discrete artifact. Distinct from scribe (which writes a narrative) — witness preserves the pivot itself.
+---
+## Coverage check
+| Layer | Plugins |
+|---|---|
+| quality | tribunal, muse, rubric |
+| governance | governor, bursar, arbiter |
+| testing | echo, canary, gauntlet |
+| safety | compass, warden |
+| analysis | cartographer, counsel, scribe |
+| memory | archivist, historian, librarian, curator |
+| discovery / routing | wayfinder, herald, dispatcher |
+| verification / execution | proctor, assayer, inspector |
+| feedback / adaptation | attendant, interpreter, adept |
+| provenance | lineage, ledger, witness |
+Every layer holds at least two plugins; most hold three. Total: 12 shipped, 0 design, 17 planned.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@onlooker-community/ecosystem",
-  "version": "0.28.1",
+  "version": "0.29.0",
   "description": "Agents, skills, hooks, commands, rules, and MCP configurations that power [Onlooker](https://onlooker.dev)",
   "author": {
     "name": "Onlooker Community",
@@ -19,14 +19,14 @@
     "onlooker-install": "install.sh"
   },
   "dependencies": {
-    "@onlooker-community/schema": "^2.8.0"
+    "@onlooker-community/schema": "^2.9.0"
   },
   "scripts": {
     "postinstall": "echo '\\n  onlooker-ecosystem installed!\\n  Run: npx onlooker-install typescript\\n  Docs: https://github.com/onlooker-community/ecosystem\\n'",
     "test": "npm run test:bats && npm run test:schema",
     "test:bats": "bats test/bats",
     "test:schema": "node --test test/node/*.test.mjs",
-    "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh plugins/assayer/scripts/hooks/*.sh plugins/assayer/scripts/lib/*.sh plugins/cartographer/scripts/hooks/*.sh plugins/cartographer/scripts/lib/*.sh plugins/bursar/scripts/hooks/*.sh plugins/bursar/scripts/lib/*.sh plugins/lineage/scripts/hooks/*.sh plugins/lineage/scripts/lib/*.sh",
+    "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh plugins/assayer/scripts/hooks/*.sh plugins/assayer/scripts/lib/*.sh plugins/cartographer/scripts/hooks/*.sh plugins/cartographer/scripts/lib/*.sh plugins/bursar/scripts/hooks/*.sh plugins/bursar/scripts/lib/*.sh plugins/lineage/scripts/hooks/*.sh plugins/lineage/scripts/lib/*.sh plugins/inspector/scripts/hooks/*.sh plugins/inspector/scripts/lib/*.sh",
     "lint:references": "node scripts/lint/check-references.mjs",
     "lint:manifests": "node scripts/lint/check-manifests.mjs",
     "coverage:node": "node scripts/coverage/run-coverage.mjs",

package/plugins/compass/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "compass",
-  "version": "0.2.1",
+  "version": "0.3.0",
   "description": "Pre-write intent clarity gate. Intercepts write-class tool calls and requires a confidence threshold before allowing them to proceed. Evaluates the pending write against the prior assistant turn as context to avoid false positives on question-answer turns. Builds on the Onlooker ecosystem plugin.",
   "author": {
     "name": "Onlooker Community",

package/plugins/compass/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Changelog
+## [0.3.0](https://github.com/onlooker-community/ecosystem/compare/compass-v0.2.1...compass-v0.3.0) (2026-06-15)
+### Features
+* **inspector:** ship the per-edit lint/typecheck plugin ([#88](https://github.com/onlooker-community/ecosystem/issues/88)) ([2018243](https://github.com/onlooker-community/ecosystem/commit/201824384abd6a4fc5f4395266924aa413a2ffd1))
 ## [0.2.1](https://github.com/onlooker-community/ecosystem/compare/compass-v0.2.0...compass-v0.2.1) (2026-06-12)

package/plugins/compass/README.md CHANGED Viewed

@@ -74,8 +74,7 @@ All keys are optional. Unset keys fall back to the plugin's `config.json` defaul
       "identity_match": "dir_plus_stem"
     },
     "transcript": {
-      "prior_turn_chars_max": 800,
-      "transcript_max_age_seconds": 300
+      "prior_turn_chars_max": 800
     },
     "skip_patterns": {
       "reply_to_question": {
@@ -115,7 +114,6 @@ All keys are optional. Unset keys fall back to the plugin's `config.json` defaul
 | `cooldown.seconds` | `120` | A write whose path shares a parent directory and filename stem with a recent successful write is skipped within this window. |
 | `cooldown.identity_match` | `dir_plus_stem` | Cooldown identity strategy. Stem comparison strips only the final extension; the cooldown does not carry across a rename. |
 | `transcript.prior_turn_chars_max` | `800` | Maximum characters of the prior assistant turn fed into the evaluator. Set to `0` to omit the prior turn for near-zero egress. |
-| `transcript.transcript_max_age_seconds` | `300` | Maximum age of the transcript file Compass will read the prior turn from. |
 | `skip_patterns.reply_to_question.enabled` | `true` | Enables the symbolic skip layer. When disabled, every write that passes the trigger gate reaches the LLM evaluator. |
 | `max_checks_per_turn` | `3` | Per-turn evaluation budget. Writes beyond this skip with reason `turn_budget_exhausted`. |
 | `min_context_chars` | `80` | Minimum sanitized context length. Shorter context skips with reason `insufficient_context`. |

package/plugins/compass/config.json CHANGED Viewed

@@ -20,8 +20,7 @@
       "identity_match": "dir_plus_stem"
     },
     "transcript": {
-      "prior_turn_chars_max": 800,
-      "transcript_max_age_seconds": 300
+      "prior_turn_chars_max": 800
     },
     "skip_patterns": {
       "reply_to_question": {

package/plugins/compass/docs/design.md CHANGED Viewed

@@ -223,8 +223,7 @@ The re-check is capped at one per intervention. After one re-check, the three pa
       "identity_match": "dir_plus_stem"
     },
     "transcript": {
-      "prior_turn_chars_max": 800,
-      "transcript_max_age_seconds": 300
+      "prior_turn_chars_max": 800
     },
     "skip_patterns": {
       "reply_to_question": {

package/plugins/compass/scripts/hooks/compass-bash-gate.sh CHANGED Viewed

@@ -13,6 +13,12 @@
 set -uo pipefail
+# Recursion guard — must be first.
+# When the evaluator shells out to `claude -p`, that subprocess can
+# trigger its own Bash hooks, which would re-enter Compass.
+[[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
+export COMPASS_NESTED=1
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
@@ -35,6 +41,7 @@ INPUT=$(cat)
 SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
 CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
 COMMAND=$(printf '%s' "$INPUT" | jq -r '.tool_input.command // ""' 2>/dev/null) || COMMAND=""
+TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
 export _HOOK_SESSION_ID="$SESSION_ID"
@@ -91,5 +98,5 @@ if ! _is_write_command "$COMMAND"; then
 	exit 0
 fi
-compass_run_gate "Bash" "" "bash_write" "$COMMAND" "$SESSION_ID" "$CWD"
+compass_run_gate "Bash" "" "bash_write" "$COMMAND" "$SESSION_ID" "$CWD" "$TRANSCRIPT_PATH"
 exit $?

package/plugins/compass/scripts/hooks/compass-pre-tool-use.sh CHANGED Viewed

@@ -12,6 +12,12 @@
 set -uo pipefail
+# Recursion guard — must be first.
+# When the evaluator shells out to `claude -p`, that subprocess can
+# trigger its own Write/Edit hooks, which would re-enter Compass.
+[[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
+export COMPASS_NESTED=1
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
@@ -34,6 +40,7 @@ INPUT=$(cat)
 SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
 CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
 TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // ""' 2>/dev/null) || TOOL_NAME=""
+TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
 export _HOOK_SESSION_ID="$SESSION_ID"
@@ -87,5 +94,5 @@ case "$TOOL_NAME" in
 		;;
 esac
-compass_run_gate "$TOOL_NAME" "$FILE_PATH" "$OPERATION" "$CONTEXT" "$SESSION_ID" "$CWD"
+compass_run_gate "$TOOL_NAME" "$FILE_PATH" "$OPERATION" "$CONTEXT" "$SESSION_ID" "$CWD" "$TRANSCRIPT_PATH"
 exit $?

package/plugins/compass/scripts/hooks/compass-record-write.sh CHANGED Viewed

@@ -11,6 +11,11 @@
 set -uo pipefail
+# Recursion guard — must be first.
+# A nested `claude -p` Write would otherwise re-enter the cooldown writer.
+[[ "${COMPASS_NESTED:-}" == "1" ]] && exit 0
+export COMPASS_NESTED=1
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"

package/plugins/compass/scripts/hooks/compass-session-start.sh CHANGED Viewed

@@ -18,14 +18,6 @@ set -uo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-_ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
-if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
-	_candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
-	if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
-		_ECOSYSTEM_ROOT="$_candidate"
-	fi
-fi
 export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
 # shellcheck source=../lib/compass-config.sh

package/plugins/compass/scripts/lib/compass-evaluator.sh CHANGED Viewed

@@ -1,20 +1,22 @@
 #!/usr/bin/env bash
-# N=5 parallel Haiku evaluator for Compass.
+# N=5 parallel `claude -p` evaluator for Compass.
 #
-# Launches N independent evaluator calls, aggregates scores, and returns
-# a decision (pass/fail) with confidence and stddev.
+# Launches N independent evaluator calls via `claude -p --max-turns 1`,
+# aggregates scores, and returns a decision (pass / fail / error) with
+# confidence and stddev.
 #
 # Exposes:
 #   compass_evaluate <tool_name> <file_path> <operation> \
 #                    <prior_turn> <context_excerpt> <session_id>
 #
-# Exits 0 if confidence >= threshold AND stddev <= stddev_threshold.
-# Exits 1 if confidence < threshold OR stddev > stddev_threshold (block).
-# Exits 2 on evaluator error (respects error_policy).
-#
 # Writes a JSON result object to stdout:
 #   {"decision":"pass|fail|error","confidence":<f>,"stddev":<f>,
 #    "primary_concern":"<str>","rationale":"<str>","sample_count":<n>}
+#
+# Exit codes:
+#   0  pass (confidence >= threshold AND stddev <= stddev_threshold)
+#   1  fail (block)
+#   2  error (respects error_policy)
 _COMPASS_EVAL_PROMPT_NO_PRIOR='You are evaluating whether a pending write operation has sufficient intent clarity.
@@ -78,87 +80,65 @@ path: FILE_PATH_PLACEHOLDER
 operation: OPERATION_PLACEHOLDER
 </tool_input>'
-# Run a single evaluator call. Writes JSON to a temp file at $output_file.
+# Strip leading/trailing markdown fences a model occasionally emits.
+_compass_strip_fences() {
+	printf '%s' "$1" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//'
+}
+# Run a single evaluator call via `claude -p`. Writes JSON to $output_file.
 # $1 — prompt text
 # $2 — model
-# $3 — temperature (as string, e.g. "0.3")
-# $4 — max_output_tokens
-# $5 — output file path
-# $6 — API key env var name (default: ANTHROPIC_API_KEY)
+# $3 — timeout (seconds)
+# $4 — output file path
 _compass_run_single_eval() {
 	local prompt="$1"
 	local model="$2"
-	local temperature="$3"
-	local max_tokens="$4"
-	local output_file="$5"
-	local api_key_var="${6:-ANTHROPIC_API_KEY}"
-	local api_key="${!api_key_var:-}"
-	[[ -z "$api_key" ]] && { printf '{"error":"no_api_key"}' > "$output_file"; return 1; }
-	local request_body
-	request_body=$(jq -n \
-		--arg model "$model" \
-		--argjson temp "$temperature" \
-		--argjson max_tokens "$max_tokens" \
-		--arg prompt "$prompt" \
-		'{
-			model: $model,
-			max_tokens: $max_tokens,
-			temperature: $temp,
-			messages: [{"role": "user", "content": $prompt}]
-		}' 2>/dev/null) || { printf '{"error":"request_build_failed"}' > "$output_file"; return 1; }
-	local http_response http_code response_body
-	http_response=$(curl -s -w '\n%{http_code}' \
-		-X POST "https://api.anthropic.com/v1/messages" \
-		-H "x-api-key: ${api_key}" \
-		-H "anthropic-version: 2023-06-01" \
-		-H "content-type: application/json" \
-		-d "$request_body" \
-		--max-time 15 \
-		2>/dev/null) || { printf '{"error":"curl_failed"}' > "$output_file"; return 1; }
-	http_code=$(printf '%s' "$http_response" | tail -n1)
-	response_body=$(printf '%s' "$http_response" | head -n -1)
-	if [[ "$http_code" == "429" ]]; then
-		sleep 2
-		http_response=$(curl -s -w '\n%{http_code}' \
-			-X POST "https://api.anthropic.com/v1/messages" \
-			-H "x-api-key: ${api_key}" \
-			-H "anthropic-version: 2023-06-01" \
-			-H "content-type: application/json" \
-			-d "$request_body" \
-			--max-time 15 \
-			2>/dev/null) || { printf '{"error":"curl_failed_retry"}' > "$output_file"; return 1; }
-		http_code=$(printf '%s' "$http_response" | tail -n1)
-		response_body=$(printf '%s' "$http_response" | head -n -1)
-	fi
+	local timeout_secs="$3"
+	local output_file="$4"
-	if [[ "$http_code" != "200" ]]; then
-		printf '{"error":"http_%s"}' "$http_code" > "$output_file"
+	if ! command -v claude >/dev/null 2>&1; then
+		printf '{"error":"claude_cli_missing"}' > "$output_file"
 		return 1
 	fi
-	local content
-	content=$(printf '%s' "$response_body" | jq -r '.content[0].text // empty' 2>/dev/null) || {
-		printf '{"error":"parse_failed"}' > "$output_file"
+	local prompt_file
+	prompt_file=$(mktemp -t compass-prompt.XXXXXX 2>/dev/null) || prompt_file="/tmp/compass-prompt.$$.${RANDOM}"
+	printf '%s' "$prompt" > "$prompt_file"
+	local args=(-p --max-turns 1)
+	[[ -n "$model" ]] && args+=(--model "$model")
+	local response=""
+	if command -v timeout >/dev/null 2>&1; then
+		response=$(COMPASS_NESTED=1 timeout "$timeout_secs" claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
+	elif command -v gtimeout >/dev/null 2>&1; then
+		response=$(COMPASS_NESTED=1 gtimeout "$timeout_secs" claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
+	else
+		response=$(COMPASS_NESTED=1 claude "${args[@]}" <"$prompt_file" 2>/dev/null) || response=""
+	fi
+	rm -f "$prompt_file" 2>/dev/null || true
+	if [[ -z "$response" ]]; then
+		printf '{"error":"empty_response"}' > "$output_file"
 		return 1
-	}
+	fi
-	# Validate the model returned parseable JSON with a score field.
+	local clean
+	clean=$(_compass_strip_fences "$response")
+	# Confirm the model returned a JSON object with a numeric score.
 	local score
-	score=$(printf '%s' "$content" | jq -r '.score // empty' 2>/dev/null) || score=""
+	score=$(printf '%s' "$clean" | jq -r '.score // empty' 2>/dev/null) || score=""
 	if [[ -z "$score" ]]; then
 		printf '{"error":"invalid_json_response"}' > "$output_file"
 		return 1
 	fi
-	printf '%s' "$content" > "$output_file"
+	printf '%s' "$clean" > "$output_file"
 }
-# Build the evaluator prompt.
+# Build the evaluator prompt by interpolating the data slots.
 _compass_build_prompt() {
 	local prior_turn="$1"
 	local context_excerpt="$2"
@@ -182,7 +162,7 @@ _compass_build_prompt() {
 	printf '%s' "$template"
 }
-# Compute mean of space-separated floats.
+# Mean of space-separated floats.
 _compass_mean() {
 	local scores=("$@")
 	local n="${#scores[@]}"
@@ -195,7 +175,7 @@ _compass_mean() {
 	awk "BEGIN {printf \"%.4f\", $sum / $n}" 2>/dev/null || printf '0'
 }
-# Compute population stddev of space-separated floats.
+# Population stddev of space-separated floats.
 _compass_stddev() {
 	local scores=("$@")
 	local n="${#scores[@]}"
@@ -213,7 +193,7 @@ _compass_stddev() {
 # Main evaluator entry point.
 # $1 — tool_name
 # $2 — file_path
-# $3 — operation  (write|edit|multi_edit|bash)
+# $3 — operation  (write|edit|multi_edit|bash_write)
 # $4 — prior_turn (may be empty)
 # $5 — context_excerpt
 # $6 — session_id
@@ -225,17 +205,11 @@ compass_evaluate() {
 	local context_excerpt="$5"
 	local session_id="${6:-unknown}"
-	local model
+	local model n_samples timeout_secs min_valid
 	model=$(compass_config_get '.compass.evaluator.model')
 	model="${model:-claude-haiku-4-5-20251001}"
-	local n_samples temperature max_tokens timeout_secs min_valid
 	n_samples=$(compass_config_get '.compass.evaluator.n')
 	n_samples="${n_samples:-5}"
-	temperature=$(compass_config_get '.compass.evaluator.temperature')
-	temperature="${temperature:-0.3}"
-	max_tokens=$(compass_config_get '.compass.evaluator.max_output_tokens')
-	max_tokens="${max_tokens:-128}"
 	timeout_secs=$(compass_config_get '.compass.evaluator.sample_timeout_seconds')
 	timeout_secs="${timeout_secs:-8}"
 	min_valid=$(compass_config_get '.compass.evaluator.min_valid_samples')
@@ -250,34 +224,22 @@ compass_evaluate() {
 	local prompt
 	prompt=$(_compass_build_prompt "$prior_turn" "$context_excerpt" "$tool_name" "$file_path" "$operation")
-	# Launch N parallel eval calls.
 	local tmp_dir
-	tmp_dir=$(mktemp -d -t compass-eval.XXXXXX 2>/dev/null) || tmp_dir="/tmp/compass-eval.$$"
+	tmp_dir=$(mktemp -d -t compass-eval.XXXXXX 2>/dev/null) || tmp_dir="/tmp/compass-eval.$$.${RANDOM}"
 	mkdir -p "$tmp_dir"
 	local pids=()
 	local i
 	for (( i=0; i<n_samples; i++ )); do
 		local out_file="${tmp_dir}/sample_${i}.json"
-		(
-			_compass_run_single_eval \
-				"$prompt" "$model" "$temperature" "$max_tokens" "$out_file"
-		) &
+		_compass_run_single_eval \
+			"$prompt" "$model" "$timeout_secs" "$out_file" &
 		pids+=($!)
 	done
-	# Collect with timeout watchdog.
-	local deadline=$(( $(date +%s) + timeout_secs ))
 	local pid
 	for pid in "${pids[@]}"; do
-		local now
-		now=$(date +%s)
-		local remaining=$(( deadline - now ))
-		if [[ "$remaining" -gt 0 ]]; then
-			wait "$pid" 2>/dev/null || true
-		else
-			kill "$pid" 2>/dev/null || true
-		fi
+		wait "$pid" 2>/dev/null || true
 	done
 	# Aggregate valid scores.
@@ -307,9 +269,7 @@ compass_evaluate() {
 		error_policy="${error_policy:-closed}"
 		local decision="error"
-		if [[ "$error_policy" == "open" ]]; then
-			decision="pass"
-		fi
+		[[ "$error_policy" == "open" ]] && decision="pass"
 		jq -n \
 			--arg decision "$decision" \