npm - @onlooker-community/ecosystem - Versions diffs - 0.17.0 → 0.19.0 - Mend

@onlooker-community/ecosystem 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/.claude-plugin/marketplace.json +26 -0
package/.claude-plugin/plugin.json +1 -1
package/.release-please-manifest.json +4 -2
package/CHANGELOG.md +14 -0
package/CLAUDE.md +1 -0
package/package.json +2 -2
package/plugins/counsel/.claude-plugin/plugin.json +14 -0
package/plugins/counsel/CHANGELOG.md +8 -0
package/plugins/counsel/config.json +20 -0
package/plugins/counsel/hooks/hooks.json +15 -0
package/plugins/counsel/scripts/hooks/counsel-session-start.sh +106 -0
package/plugins/counsel/scripts/lib/counsel-brief.sh +247 -0
package/plugins/counsel/scripts/lib/counsel-config.sh +72 -0
package/plugins/counsel/scripts/lib/counsel-events.sh +80 -0
package/plugins/counsel/scripts/lib/counsel-project-key.sh +79 -0
package/plugins/counsel/scripts/lib/counsel-reader.sh +114 -0
package/plugins/counsel/scripts/lib/counsel-synthesize.sh +103 -0
package/plugins/counsel/scripts/lib/counsel-ulid.sh +45 -0
package/plugins/warden/.claude-plugin/plugin.json +14 -0
package/plugins/warden/CHANGELOG.md +10 -0
package/plugins/warden/config.json +51 -0
package/plugins/warden/docs/adr/001-detect-after-ingest-gate-before-action.md +62 -0
package/plugins/warden/docs/design.md +123 -0
package/plugins/warden/hooks/hooks.json +73 -0
package/plugins/warden/scripts/hooks/warden-post-tool-use.sh +201 -0
package/plugins/warden/scripts/hooks/warden-pre-tool-use.sh +94 -0
package/plugins/warden/scripts/hooks/warden-session-start.sh +52 -0
package/plugins/warden/scripts/lib/warden-cli.sh +124 -0
package/plugins/warden/scripts/lib/warden-config.sh +79 -0
package/plugins/warden/scripts/lib/warden-evaluator.sh +246 -0
package/plugins/warden/scripts/lib/warden-events.sh +85 -0
package/plugins/warden/scripts/lib/warden-gate-state.sh +105 -0
package/plugins/warden/scripts/lib/warden-patterns.sh +132 -0
package/plugins/warden/scripts/lib/warden-sanitizer.sh +80 -0
package/plugins/warden/scripts/lib/warden-scanner.sh +119 -0
package/plugins/warden/scripts/lib/warden-ulid.sh +50 -0
package/plugins/warden/skills/warden/SKILL.md +49 -0
package/release-please-config.json +32 -0
package/test/bats/counsel-project-key.bats +82 -0
package/test/bats/counsel-reader.bats +132 -0
package/test/bats/warden-config.bats +54 -0
package/test/bats/warden-events.bats +85 -0
package/test/bats/warden-gate-state.bats +67 -0
package/test/bats/warden-patterns.bats +58 -0
package/test/bats/warden-sanitizer.bats +53 -0
package/test/bats/warden-scanner.bats +56 -0
package/test/bats/warden-ulid.bats +30 -0

package/plugins/counsel/scripts/lib/counsel-events.sh ADDED Viewed

@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Canonical counsel.* event emission.
+#
+# Thin wrapper around the ecosystem plugin's onlooker-event.mjs `emit` mode.
+# Every emission is validated against @onlooker-community/schema before being
+# appended to $ONLOOKER_EVENTS_LOG (defaults to $ONLOOKER_DIR/logs/onlooker-events.jsonl).
+#
+# Usage:
+#   counsel_emit_event "counsel.brief.generated" '{"period_start":"...","period_end":"...","recommendation_count":3}'
+_COUNSEL_PLUGIN_NAME="counsel"
+_counsel_event_js_path() {
+	if [[ -n "${_ONLOOKER_EVENT_JS:-}" && -f "$_ONLOOKER_EVENT_JS" ]]; then
+		printf '%s' "$_ONLOOKER_EVENT_JS"
+		return 0
+	fi
+	local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
+	local candidates=(
+		"${plugin_root}/scripts/lib/onlooker-event.mjs"
+		"${plugin_root}/../../scripts/lib/onlooker-event.mjs"
+	)
+	local c
+	for c in "${candidates[@]}"; do
+		[[ -f "$c" ]] && { printf '%s' "$c"; return 0; }
+	done
+	return 1
+}
+_counsel_session_id() {
+	if [[ -n "${_HOOK_SESSION_ID:-}" ]]; then
+		printf '%s' "$_HOOK_SESSION_ID"
+		return 0
+	fi
+	if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then
+		printf '%s' "$CLAUDE_SESSION_ID"
+		return 0
+	fi
+	printf 'unknown'
+}
+counsel_emit_event() {
+	local event_type="${1:-}"
+	local payload="${2:-}"
+	[[ -z "$event_type" || -z "$payload" ]] && return 1
+	local event_js
+	event_js=$(_counsel_event_js_path) || return 1
+	local session_id
+	session_id=$(_counsel_session_id)
+	local params
+	params=$(jq -n \
+		--arg plugin "$_COUNSEL_PLUGIN_NAME" \
+		--arg sid "$session_id" \
+		--arg type "$event_type" \
+		--argjson payload "$payload" \
+		'{plugin: $plugin, session_id: $sid, event_type: $type, payload: $payload}' \
+		2>/dev/null) || return 1
+	local event
+	local stderr_file
+	stderr_file=$(mktemp -t counsel-event-err.XXXXXX 2>/dev/null) || stderr_file="/tmp/counsel-event-err.$$"
+	event=$(printf '%s' "$params" \
+		| ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
+		  ONLOOKER_PLUGIN_NAME="$_COUNSEL_PLUGIN_NAME" \
+		  node "$event_js" emit 2>"$stderr_file") || {
+		printf 'counsel_emit_event: schema validation failed for %s\n' "$event_type" >&2
+		[[ -s "$stderr_file" ]] && cat "$stderr_file" >&2
+		rm -f "$stderr_file"
+		return 1
+	}
+	rm -f "$stderr_file"
+	local log_path="${ONLOOKER_EVENTS_LOG:-${ONLOOKER_DIR:-$HOME/.onlooker}/logs/onlooker-events.jsonl}"
+	mkdir -p "$(dirname "$log_path")" 2>/dev/null || return 1
+	printf '%s\n' "$event" >> "$log_path"
+}

package/plugins/counsel/scripts/lib/counsel-project-key.sh ADDED Viewed

@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# Project key derivation for Counsel.
+#
+# Mirrors the tribunal/scribe project-key scheme so plugins partition storage
+# identically. A project key is a stable 12-char hex identifier derived from
+# the git remote URL (preferred) or repo root path (fallback).
+_counsel_sha256_first12() {
+	local input="$1"
+	if command -v shasum >/dev/null 2>&1; then
+		printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
+	elif command -v sha256sum >/dev/null 2>&1; then
+		printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
+	else
+		return 1
+	fi
+}
+counsel_project_remote_url() {
+	local cwd="${1:-}"
+	[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
+	git -C "$cwd" remote get-url origin 2>/dev/null || true
+}
+# Worktree-aware: uses common-dir so worktrees share a key with the main repo.
+counsel_project_repo_root() {
+	local cwd="${1:-}"
+	[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
+	if ! git -C "$cwd" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+		return 0
+	fi
+	local common_dir toplevel
+	common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
+	if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
+		common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
+	fi
+	if [[ -n "$common_dir" && -d "$common_dir" ]]; then
+		toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
+	fi
+	if [[ -z "$toplevel" ]]; then
+		toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
+		[[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
+	fi
+	printf '%s' "$toplevel"
+}
+counsel_project_key() {
+	local cwd="${1:-}"
+	[[ -z "$cwd" ]] && cwd="$(pwd)"
+	local remote
+	remote=$(counsel_project_remote_url "$cwd")
+	if [[ -n "$remote" ]]; then
+		_counsel_sha256_first12 "remote:$remote"
+		return 0
+	fi
+	local root
+	root=$(counsel_project_repo_root "$cwd")
+	if [[ -n "$root" ]]; then
+		_counsel_sha256_first12 "root:$root"
+		return 0
+	fi
+	return 0
+}
+counsel_project_dir() {
+	local project_key="${1:-}"
+	[[ -z "$project_key" ]] && return 1
+	local onlooker_dir="${ONLOOKER_DIR:-${HOME}/.onlooker}"
+	printf '%s' "${onlooker_dir}/counsel/${project_key}/briefs"
+}

package/plugins/counsel/scripts/lib/counsel-reader.sh ADDED Viewed

@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Event log reader for Counsel.
+#
+# Reads $ONLOOKER_EVENTS_LOG and returns a filtered, summarized view of
+# the last N days suitable for passing to the synthesis prompt.
+#
+# Exposes:
+#   counsel_read_events <lookback_days> <chars_max>
+#     Echoes a structured text summary of events, or empty string on failure.
+#
+#   counsel_sources_from_events <events_json>
+#     Echoes a JSON array of CounselSource strings present in the event batch.
+# Maps event_type prefixes to CounselSource values.
+_counsel_source_for_type() {
+	local event_type="${1:-}"
+	case "$event_type" in
+		tribunal.*)     printf 'tribunal_verdicts' ;;
+		echo.*)         printf 'echo_regressions' ;;
+		sentinel.*)     printf 'sentinel_audit' ;;
+		warden.*)       printf 'warden_audit' ;;
+		oracle.*)       printf 'oracle_calibrations' ;;
+		meridian.*)     printf 'meridian_reliance' ;;
+		*)              printf 'onlooker_events' ;;
+	esac
+}
+counsel_read_events() {
+	local lookback_days="${1:-30}"
+	local chars_max="${2:-60000}"
+	local log_path="${ONLOOKER_EVENTS_LOG:-${ONLOOKER_DIR:-$HOME/.onlooker}/logs/onlooker-events.jsonl}"
+	[[ -f "$log_path" ]] || { printf ''; return 0; }
+	# Compute cutoff as an ISO 8601 date string. ISO 8601 strings are
+	# lexicographically sortable, so string comparison is safe for filtering.
+	local cutoff_ts
+	if [[ "$(uname)" == "Darwin" ]]; then
+		cutoff_ts=$(date -v "-${lookback_days}d" -u '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null) \
+			|| cutoff_ts=""
+	else
+		cutoff_ts=$(date -d "-${lookback_days} days" -u '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null) \
+			|| cutoff_ts=""
+	fi
+	# Filter to events within the lookback window. If cutoff_ts is empty (date
+	# command unavailable) fall through and include all events.
+	local summary
+	# -rc: compact output keeps each object on one line (JSONL-shaped), which
+	# downstream counsel_count_events and counsel_sources_from_events require.
+	summary=$(jq -rc --arg cutoff "$cutoff_ts" '
+		select(.timestamp != null) |
+		select($cutoff == "" or .timestamp >= $cutoff) |
+		{
+			type:      .event_type,
+			plugin:    (.plugin // "unknown"),
+			ts:        .timestamp,
+			session:   (.session_id // ""),
+			payload:   (.payload // {})
+		}
+	' "$log_path" 2>/dev/null | head -c "$chars_max") || summary=""
+	printf '%s' "$summary"
+}
+counsel_sources_from_events() {
+	local events_text="${1:-}"
+	[[ -z "$events_text" ]] && { printf '["onlooker_events"]'; return 0; }
+	local sources=()
+	local seen_tribunal=0 seen_echo=0 seen_sentinel=0 seen_warden=0 seen_oracle=0 seen_meridian=0 seen_other=0
+	while IFS= read -r line; do
+		[[ -z "$line" ]] && continue
+		local etype
+		etype=$(printf '%s' "$line" | jq -r '.type // ""' 2>/dev/null) || continue
+		case "$etype" in
+			tribunal.*)  seen_tribunal=1 ;;
+			echo.*)      seen_echo=1 ;;
+			sentinel.*)  seen_sentinel=1 ;;
+			warden.*)    seen_warden=1 ;;
+			oracle.*)    seen_oracle=1 ;;
+			meridian.*)  seen_meridian=1 ;;
+			*)           seen_other=1 ;;
+		esac
+	done <<< "$events_text"
+	[[ "$seen_other" -eq 1 ]]    && sources+=("\"onlooker_events\"")
+	[[ "$seen_tribunal" -eq 1 ]] && sources+=("\"tribunal_verdicts\"")
+	[[ "$seen_echo" -eq 1 ]]     && sources+=("\"echo_regressions\"")
+	[[ "$seen_sentinel" -eq 1 ]] && sources+=("\"sentinel_audit\"")
+	[[ "$seen_warden" -eq 1 ]]   && sources+=("\"warden_audit\"")
+	[[ "$seen_oracle" -eq 1 ]]   && sources+=("\"oracle_calibrations\"")
+	[[ "$seen_meridian" -eq 1 ]] && sources+=("\"meridian_reliance\"")
+	if [[ "${#sources[@]}" -eq 0 ]]; then
+		printf '["onlooker_events"]'
+		return 0
+	fi
+	local joined
+	joined=$(IFS=,; printf '%s' "${sources[*]}")
+	printf '[%s]' "$joined"
+}
+counsel_count_events() {
+	local events_text="${1:-}"
+	[[ -z "$events_text" ]] && { printf '0'; return 0; }
+	local count=0
+	while IFS= read -r line; do
+		[[ -n "$line" ]] && count=$((count + 1))
+	done <<< "$events_text"
+	printf '%s' "$count"
+}

package/plugins/counsel/scripts/lib/counsel-synthesize.sh ADDED Viewed

@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Synthesis pass for Counsel.
+#
+# Runs a single Haiku call over the event summary to produce a structured
+# improvement brief. The brief is returned as a JSON object.
+#
+# Exposes:
+#   counsel_synthesize <events_text> <model> <timeout_s> <max_tokens> <temperature>
+#     Echoes a JSON object on success, empty string on failure.
+#     JSON shape:
+#       {
+#         "summary":         string,
+#         "patterns":        [string],
+#         "recommendations": [{title, rationale, priority:"high"|"medium"|"low"}],
+#         "wins":            [string],
+#         "watch":           [string]
+#       }
+_COUNSEL_SYNTHESIS_PROMPT='You are an engineering coach analyzing an AI agent observability log. You have been given a structured dump of plugin events from the onlooker ecosystem over the past several weeks. Your job is to synthesize patterns, surface improvement opportunities, and highlight what is working well.
+Focus on:
+- Recurring failure modes or blocked gates (tribunal, sentinel, warden)
+- Prompt regression trends (echo plugin)
+- Budget or resource pressure patterns (governor plugin)
+- Quality trends over time
+- What the team is consistently doing well
+Return a JSON object with exactly these keys:
+{
+  "summary": "2-3 sentence executive summary of the period",
+  "patterns": ["observed pattern — what is happening and how often"],
+  "recommendations": [
+    {
+      "title": "short action title",
+      "rationale": "1-2 sentences explaining why this matters",
+      "priority": "high"
+    }
+  ],
+  "wins": ["thing that is working well — be specific"],
+  "watch": ["trend to monitor — not urgent but worth watching"]
+}
+Rules:
+- All fields are required; use empty arrays [] if no items found
+- recommendations must have priority: "high", "medium", or "low"
+- Keep each item to 1-2 sentences
+- Return ONLY the JSON object — no prose, no markdown fences, no explanation
+- If there is insufficient data to draw conclusions, say so in summary and return empty arrays
+'
+counsel_synthesize() {
+	local events_text="${1:-}"
+	local model="${2:-claude-haiku-4-5-20251001}"
+	local timeout_s="${3:-90}"
+	local max_tokens="${4:-4096}"
+	local temperature="${5:-0.4}"
+	[[ -z "$events_text" ]] && return 1
+	if ! command -v claude >/dev/null 2>&1; then
+		printf 'counsel_synthesize: claude CLI not found\n' >&2
+		return 1
+	fi
+	local prompt_file
+	prompt_file=$(mktemp -t counsel-synth.XXXXXX 2>/dev/null) || prompt_file="/tmp/counsel-synth.$$"
+	trap 'rm -f "$prompt_file"' RETURN
+	{
+		printf '%s' "$_COUNSEL_SYNTHESIS_PROMPT"
+		printf '<event_log>\n'
+		printf '%s\n' "$events_text"
+		printf '</event_log>\n'
+	} > "$prompt_file"
+	local claude_args=(-p --max-turns 1 --model "$model" --max-tokens "$max_tokens")
+	local response=""
+	if command -v timeout >/dev/null 2>&1; then
+		response=$(timeout "$timeout_s" claude "${claude_args[@]}" < "$prompt_file" 2>/dev/null) || response=""
+	elif command -v gtimeout >/dev/null 2>&1; then
+		response=$(gtimeout "$timeout_s" claude "${claude_args[@]}" < "$prompt_file" 2>/dev/null) || response=""
+	else
+		response=$(claude "${claude_args[@]}" < "$prompt_file" 2>/dev/null) || response=""
+	fi
+	[[ -z "$response" ]] && return 1
+	# Strip markdown fences if present.
+	local clean
+	clean=$(printf '%s' "$response" \
+		| sed -e 's/^```json[[:space:]]*//' -e 's/^```[[:space:]]*//' -e 's/[[:space:]]*```$//')
+	if ! printf '%s' "$clean" | jq -e \
+		'.summary and (.patterns | type == "array") and (.recommendations | type == "array") and (.wins | type == "array") and (.watch | type == "array")' \
+		>/dev/null 2>&1; then
+		printf 'counsel_synthesize: response missing required keys\n' >&2
+		return 1
+	fi
+	printf '%s' "$clean"
+}

package/plugins/counsel/scripts/lib/counsel-ulid.sh ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Minimal ULID generator for Counsel brief IDs.
+_COUNSEL_ULID_ALPHABET="0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+_counsel_ulid_encode() {
+	local n="$1"
+	local len="$2"
+	local out=""
+	local i
+	for ((i = 0; i < len; i++)); do
+		out="${_COUNSEL_ULID_ALPHABET:$((n % 32)):1}${out}"
+		n=$((n / 32))
+	done
+	printf '%s' "$out"
+}
+counsel_ulid() {
+	local now_ms
+	if [[ "$(uname)" == "Darwin" ]]; then
+		now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
+			|| now_ms=$(($(date +%s) * 1000))
+	else
+		now_ms=$(date +%s%3N 2>/dev/null) || now_ms=$(($(date +%s) * 1000))
+	fi
+	local rand_hex rand_hi rand_lo
+	rand_hex=$(openssl rand -hex 10 2>/dev/null)
+	if [[ -n "$rand_hex" && ${#rand_hex} -eq 20 ]]; then
+		rand_hi=$((16#${rand_hex:0:10}))
+		rand_lo=$((16#${rand_hex:10:10}))
+	else
+		rand_hi=$((RANDOM * 32768 + RANDOM))
+		rand_lo=$((RANDOM * 32768 + RANDOM))
+		rand_hi=$(((rand_hi * 256 + RANDOM % 256) & ((1 << 40) - 1)))
+		rand_lo=$(((rand_lo * 256 + RANDOM % 256) & ((1 << 40) - 1)))
+	fi
+	local ts_part hi_part lo_part
+	ts_part=$(_counsel_ulid_encode "$now_ms" 10)
+	hi_part=$(_counsel_ulid_encode "$rand_hi" 8)
+	lo_part=$(_counsel_ulid_encode "$rand_lo" 8)
+	printf '%s%s%s' "$ts_part" "$hi_part" "$lo_part"
+}

package/plugins/warden/.claude-plugin/plugin.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "name": "warden",
+  "version": "0.2.0",
+  "description": "Untrusted-content gate. Scans content flowing in through WebFetch and Read for prompt-injection patterns, and when a threat is detected closes a session-scoped gate that blocks Write, Edit, and Bash until the user explicitly clears it. Grounded in Meta's Agents Rule of Two: an agent should hold no more than two of {private data, external actions, untrusted content} at once — warden removes the external-actions property while untrusted content is in play. Builds on the Onlooker ecosystem plugin.",
+  "author": {
+    "name": "Onlooker Community",
+    "url": "https://onlooker.dev"
+  },
+  "homepage": "https://onlooker.dev",
+  "repository": "https://github.com/onlooker-community/ecosystem",
+  "license": "MIT",
+  "skills": ["./skills/warden"],
+  "agents": []
+}

package/plugins/warden/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,10 @@
+# Changelog
+## [0.2.0](https://github.com/onlooker-community/ecosystem/compare/warden-v0.1.0...warden-v0.2.0) (2026-06-02)
+### Features
+* **warden:** untrusted-content gate enforcing the Agents Rule of Two :shield: ([#53](https://github.com/onlooker-community/ecosystem/issues/53)) ([210aa51](https://github.com/onlooker-community/ecosystem/commit/210aa51bff66226a0eec1f17292a2af4ea4ef56a))
+## Changelog

package/plugins/warden/config.json ADDED Viewed

@@ -0,0 +1,51 @@
+{
+  "plugin_name": "warden",
+  "storage_path": "~/.onlooker",
+  "warden": {
+    "enabled": false,
+    "scan": {
+      "sources": ["web_fetch", "file_read"],
+      "max_content_chars": 20000,
+      "skip_globs": ["**/*.lock", "**/*.sum", "**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
+      "store_snippet": true,
+      "snippet_max_chars": 240
+    },
+    "detection": {
+      "close_threshold": 0.65,
+      "strong_pattern_confidence": 0.9,
+      "weak_pattern_confidence": 0.5,
+      "threshold_calibration_note": "Strong pattern hits (explicit override/exfil phrasing) score 0.9 and close the gate without an LLM call. Weak hits (suspicion markers near imperative verbs, delimiter tags, long base64 blobs) score 0.5 — below close_threshold — and escalate to the evaluator when escalation.enabled is true. Clean content never calls the model."
+    },
+    "escalation": {
+      "enabled": true,
+      "borderline_only": true,
+      "model": "claude-haiku-4-5-20251001",
+      "n": 3,
+      "temperature": 0.0,
+      "max_output_tokens": 192,
+      "sample_timeout_seconds": 12,
+      "min_valid_samples": 2
+    },
+    "gate": {
+      "blocked_tools": ["Write", "Edit", "MultiEdit", "Bash"],
+      "clear_policy": "user_override_only"
+    },
+    "sanitization": {
+      "strip_sequences": [
+        "<source_content>",
+        "</source_content>",
+        "<instructions>",
+        "</instructions>",
+        "<|",
+        "[INST]",
+        "[/INST]",
+        "<<SYS>>",
+        "<</SYS>>"
+      ],
+      "strip_null_bytes": true
+    },
+    "data_egress": {
+      "note": "On escalation, only a sanitized, length-capped excerpt of the ingested content is sent to the evaluator model. Set escalation.enabled=false to disable all egress — warden then relies on the deterministic pattern floor alone (zero network, zero egress, weaker coverage of novel phrasing)."
+    }
+  }
+}

package/plugins/warden/docs/adr/001-detect-after-ingest-gate-before-action.md ADDED Viewed

@@ -0,0 +1,62 @@
+# ADR-001: Warden Detects After Ingestion and Gates Before Action
+- Status: Accepted
+- Date: 2026-06-02
+- Deciders: Meagan
+- Tags: warden, rule-of-two, hook-architecture, prompt-injection, content-gate
+## Context and Problem Statement
+Warden defends against prompt injection arriving through untrusted content — content the agent ingests via `WebFetch` and `Read`. The naive instinct for a "scan content before the agent processes it" plugin is to scan at `PreToolUse`: inspect the thing before it enters the context, and block it if it's hostile.
+That instinct does not fit the actual data flow:
+1. **The content does not exist before the tool runs.** A `WebFetch` result is only known *after* the fetch. A `Read` result is the file's contents, surfaced in the `tool_response`. At `PreToolUse` there is nothing to scan but a URL or a path — far too little signal to classify an injection, and scanning the URL/path alone would miss the entire payload.
+2. **Blocking the read is the wrong lever.** Reading a hostile page is not itself harmful; reading is how the agent and the user *discover* that the page is hostile. The harm is what the agent does *next* with that content — writing a file, editing code, running a command, exfiltrating a secret. The threat is downstream of ingestion.
+So the question is not "how do we stop the agent from reading bad content" (we can't, and shouldn't), but "once bad content is in the context, how do we prevent it from driving an external action." This is precisely the framing of Meta's **Agents Rule of Two**: untrusted content (property C) is now present alongside private-data access (A) and external-action capability (B); we must drop one of the other two. Dropping B — external actions — is the safe, reversible choice.
+## Decision Drivers
+- **Signal availability**: the injection payload only exists in `tool_response`, which is a `PostToolUse` field. Detection must run where the content is.
+- **No timing skew**: `PostToolUse` fires after the content is committed to the transcript, so the scan sees exactly what the agent sees — no race.
+- **Reversibility**: the response to a detected threat should be a *pause a human can lift*, not a destructive or silent action. Revoking external actions is reversible; un-reading is not.
+- **Rule-of-Two alignment**: the mitigation should map cleanly onto removing exactly one of the three properties. Gating B (Write/Edit/Bash) is that mapping.
+- **Fail-soft**: a detector that runs on every read must not block reads when it errors, and the enforcement check must be cheap enough to run before every write without latency cost.
+## Considered Options
+1. **Scan at `PreToolUse` on WebFetch/Read and block the read.** Inspect before ingestion.
+2. **Detect at `PostToolUse` on WebFetch/Read; gate at `PreToolUse` on Write/Edit/MultiEdit/Bash.** Split detection from enforcement across two hook surfaces, mediated by a session-scoped lock.
+3. **Single `PreToolUse` hook on the write-class tools that re-scans the whole transcript each time.** No PostToolUse; scan lazily at write time.
+## Decision
+We adopt **Option 2: detect after ingestion, gate before action.**
+- **Detection** runs on `PostToolUse` for `WebFetch` and `Read`. It extracts the ingested content from `tool_response`, runs the hybrid scanner, and on a positive verdict **closes a session-scoped content gate** (`gate.json`) and emits `warden.threat.detected`. PostToolUse cannot block the tool — and deliberately does not need to, because blocking the read is not the goal.
+- **Enforcement** runs on `PreToolUse` for `Write`, `Edit`, `MultiEdit`, and `Bash`. It is a pure lock check: if the gate is closed, it returns `{"decision":"block", …}` and emits `warden.gate.blocked`; otherwise it allows silently. No model call, no command parsing.
+- The two surfaces communicate **only** through the gate lock on disk — never by calling each other — consistent with the ecosystem's event-bus discipline.
+Option 1 is rejected: there is nothing meaningful to scan at `PreToolUse` for these tools, and blocking the read is both ineffective (the threat is downstream) and user-hostile (it prevents discovery). Option 3 is rejected: re-scanning the full transcript on every write is expensive, repeats work, and loses the clean "this specific source was hostile" provenance that the PostToolUse scan captures at ingestion time.
+## Consequences
+### Positive
+- Detection sees the real payload (`tool_response`), so classification is meaningful.
+- The response is reversible and human-gated: external actions pause; the user clears the gate with `/warden clear`.
+- Enforcement is O(1) and fail-closed (a present lock always blocks), so gating every write is cheap.
+- The design maps one-to-one onto the Rule of Two: detection observes property C arriving; enforcement removes property B until a human restores it.
+- Clean separation: detection cost (possibly a model call) is paid once per ingested source; enforcement cost is a file stat.
+### Negative / trade-offs
+- The hostile content **is** in the context by the time the gate closes — warden mitigates the consequence (external action), not the ingestion. This is inherent to the threat model and is exactly why the mitigation targets property B.
+- A gate closed late in a turn can block writes the agent already intended as benign; the user must clear it. This is the intended friction, not a bug.
+- Session-scoped state means a brand-new session starts open even if a prior session saw a threat. Acceptable: the untrusted content lives in a specific session's context, and warden gates that context.
+## Related
+- Plugin design: [`../design.md`](../design.md)
+- Schema: `warden.threat.detected`, `warden.gate.blocked`, `warden.threat.cleared` in `@onlooker-community/schema` (plugins-safety payloads).