npm - @possumtech/rummy - Versions diffs - 0.5.0 → 2.0.1 - Mend

@possumtech/rummy 0.5.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

package/.env.example +42 -5
package/PLUGINS.md +389 -194
package/README.md +25 -8
package/SPEC.md +934 -373
package/bin/demo.js +166 -0
package/bin/rummy.js +9 -3
package/biome/no-fallbacks.grit +50 -0
package/lang/en.json +2 -2
package/migrations/001_initial_schema.sql +88 -37
package/package.json +13 -11
package/scriptify/ask_run.js +77 -0
package/service.js +50 -9
package/src/agent/AgentLoop.js +476 -335
package/src/agent/ContextAssembler.js +4 -4
package/src/agent/Entries.js +676 -0
package/src/agent/ProjectAgent.js +30 -18
package/src/agent/TurnExecutor.js +232 -421
package/src/agent/XmlParser.js +99 -33
package/src/agent/budget.js +56 -0
package/src/agent/errors.js +22 -0
package/src/agent/httpStatus.js +39 -0
package/src/agent/known_checks.sql +8 -4
package/src/agent/known_queries.sql +9 -13
package/src/agent/known_store.sql +280 -125
package/src/agent/materializeContext.js +104 -0
package/src/agent/runs.sql +29 -7
package/src/agent/schemes.sql +14 -3
package/src/agent/tokens.js +6 -0
package/src/agent/turns.sql +9 -9
package/src/hooks/HookRegistry.js +6 -5
package/src/hooks/Hooks.js +44 -3
package/src/hooks/PluginContext.js +29 -21
package/src/{server → hooks}/RpcRegistry.js +2 -1
package/src/hooks/RummyContext.js +139 -35
package/src/hooks/ToolRegistry.js +21 -16
package/src/llm/LlmProvider.js +66 -89
package/src/llm/errors.js +21 -0
package/src/llm/retry.js +63 -0
package/src/plugins/ask_user/README.md +1 -1
package/src/plugins/ask_user/ask_user.js +37 -12
package/src/plugins/ask_user/ask_userDoc.js +2 -25
package/src/plugins/ask_user/ask_userDoc.md +10 -0
package/src/plugins/budget/README.md +27 -25
package/src/plugins/budget/budget.js +306 -88
package/src/plugins/cp/README.md +2 -2
package/src/plugins/cp/cp.js +29 -11
package/src/plugins/cp/cpDoc.js +2 -15
package/src/plugins/cp/cpDoc.md +7 -0
package/src/plugins/engine/README.md +2 -2
package/src/plugins/engine/engine.sql +4 -4
package/src/plugins/engine/turn_context.sql +10 -10
package/src/plugins/env/README.md +20 -5
package/src/plugins/env/env.js +45 -6
package/src/plugins/env/envDoc.js +2 -23
package/src/plugins/env/envDoc.md +13 -0
package/src/plugins/error/README.md +16 -0
package/src/plugins/error/error.js +151 -0
package/src/plugins/file/README.md +6 -6
package/src/plugins/file/file.js +15 -2
package/src/plugins/get/README.md +1 -1
package/src/plugins/get/get.js +103 -48
package/src/plugins/get/getDoc.js +2 -32
package/src/plugins/get/getDoc.md +36 -0
package/src/plugins/hedberg/README.md +1 -2
package/src/plugins/hedberg/hedberg.js +8 -4
package/src/plugins/hedberg/matcher.js +16 -17
package/src/plugins/hedberg/normalize.js +0 -48
package/src/plugins/helpers.js +42 -2
package/src/plugins/index.js +146 -123
package/src/plugins/instructions/README.md +35 -9
package/src/plugins/instructions/instructions.js +244 -9
package/src/plugins/instructions/instructions.md +33 -0
package/src/plugins/instructions/instructions_104.md +7 -0
package/src/plugins/instructions/instructions_105.md +38 -0
package/src/plugins/instructions/instructions_106.md +21 -0
package/src/plugins/instructions/instructions_107.md +10 -0
package/src/plugins/instructions/instructions_108.md +0 -0
package/src/plugins/instructions/protocol.js +12 -0
package/src/plugins/known/README.md +2 -2
package/src/plugins/known/known.js +68 -36
package/src/plugins/known/knownDoc.js +2 -17
package/src/plugins/known/knownDoc.md +8 -0
package/src/plugins/log/README.md +48 -0
package/src/plugins/log/log.js +129 -0
package/src/plugins/mv/README.md +2 -2
package/src/plugins/mv/mv.js +55 -22
package/src/plugins/mv/mvDoc.js +2 -18
package/src/plugins/mv/mvDoc.md +10 -0
package/src/plugins/ollama/README.md +15 -0
package/src/{llm/OllamaClient.js → plugins/ollama/ollama.js} +40 -18
package/src/plugins/openai/README.md +17 -0
package/src/plugins/openai/openai.js +120 -0
package/src/plugins/openrouter/README.md +27 -0
package/src/plugins/openrouter/openrouter.js +121 -0
package/src/plugins/persona/README.md +20 -0
package/src/plugins/persona/persona.js +9 -16
package/src/plugins/policy/README.md +21 -0
package/src/plugins/policy/policy.js +29 -14
package/src/plugins/prompt/README.md +1 -1
package/src/plugins/prompt/prompt.js +64 -16
package/src/plugins/rm/README.md +1 -1
package/src/plugins/rm/rm.js +56 -12
package/src/plugins/rm/rmDoc.js +2 -20
package/src/plugins/rm/rmDoc.md +13 -0
package/src/plugins/rpc/README.md +2 -2
package/src/plugins/rpc/rpc.js +525 -296
package/src/plugins/set/README.md +1 -1
package/src/plugins/set/set.js +318 -75
package/src/plugins/set/setDoc.js +2 -35
package/src/plugins/set/setDoc.md +22 -0
package/src/plugins/sh/README.md +28 -5
package/src/plugins/sh/sh.js +50 -6
package/src/plugins/sh/shDoc.js +2 -23
package/src/plugins/sh/shDoc.md +13 -0
package/src/plugins/skill/README.md +23 -0
package/src/plugins/skill/skill.js +14 -18
package/src/plugins/stream/README.md +101 -0
package/src/plugins/stream/stream.js +290 -0
package/src/plugins/telemetry/README.md +1 -1
package/src/plugins/telemetry/telemetry.js +129 -80
package/src/plugins/think/README.md +1 -1
package/src/plugins/think/think.js +12 -0
package/src/plugins/think/thinkDoc.js +2 -15
package/src/plugins/think/thinkDoc.md +7 -0
package/src/plugins/unknown/README.md +3 -3
package/src/plugins/unknown/unknown.js +47 -19
package/src/plugins/unknown/unknownDoc.js +2 -21
package/src/plugins/unknown/unknownDoc.md +11 -0
package/src/plugins/update/README.md +1 -1
package/src/plugins/update/update.js +83 -5
package/src/plugins/update/updateDoc.js +2 -30
package/src/plugins/update/updateDoc.md +8 -0
package/src/plugins/xai/README.md +23 -0
package/src/{llm/XaiClient.js → plugins/xai/xai.js} +58 -37
package/src/plugins/yolo/yolo.js +192 -0
package/src/server/ClientConnection.js +64 -37
package/src/server/SocketServer.js +23 -10
package/src/server/protocol.js +11 -0
package/src/sql/v_model_context.sql +27 -31
package/src/sql/v_run_log.sql +9 -14
package/EXCEPTIONS.md +0 -46
package/FIDELITY_CONTRACT.md +0 -172
package/src/agent/KnownStore.js +0 -337
package/src/agent/ResponseHealer.js +0 -241
package/src/llm/OpenAiClient.js +0 -100
package/src/llm/OpenRouterClient.js +0 -100
package/src/plugins/budget/recovery.js +0 -47
package/src/plugins/instructions/preamble.md +0 -45
package/src/plugins/performed/README.md +0 -15
package/src/plugins/performed/performed.js +0 -45
package/src/plugins/previous/README.md +0 -16
package/src/plugins/previous/previous.js +0 -56
package/src/plugins/progress/README.md +0 -16
package/src/plugins/progress/progress.js +0 -43
package/src/plugins/summarize/README.md +0 -19
package/src/plugins/summarize/summarize.js +0 -32
package/src/plugins/summarize/summarizeDoc.js +0 -27

package/SPEC.md CHANGED Viewed

@@ -1,120 +1,311 @@
 # RUMMY: Architecture Specification
-The authoritative reference for Rummy's design. The sacred prompt
-The instructions plugin (`preamble.md` + tool docs) defines
-model-facing behavior. This document defines everything else.
+The authoritative reference for Rummy's design. The instructions
+plugin (`instructions.md` + phase-specific `instructions_10N.md` +
+tool docs) defines model-facing behavior. This document defines
+everything else.
 ---
-## 0. Design Philosophy: Events & Filters
+## The Contract
-Rummy is a hooks-and-filters system. Every structural seam in the
-pipeline is a hookable checkpoint. Plugins subscribe to events
-(fire-and-forget side effects) and filters (transformation chains
-that thread a value through subscribers in priority order).
+Rummy has one contract. Every actor speaks it.
-**Every `<tag>` the model sees is a plugin.** The `<known>` section
-of the system message is rendered by the known plugin. The `<progress>`
-section is rendered by the progress plugin. The `<prompt>` tag is
-rendered by the prompt plugin. No monolithic assembler decides what goes where.
-Each plugin filters for its own data from the shared row set, renders
-its section, and returns.
+### Entries {#entries}
+An entry is the sole unit of state the contract names. Every entry
+carries:
+| Field | Meaning |
+|-------|---------|
+| **path** | Identity. `scheme://locator` or bare filepath. |
+| **body** | Content (text). |
+| **attributes** | JSON bag of structured metadata. |
+| **visibility** | `visible \| summarized \| archived`. What the model sees of this entry next turn. |
+| **state** | `proposed \| streaming \| resolved \| failed \| cancelled`. Where the entry is in its lifecycle. |
+| **outcome** | Short reason string when state ∈ {failed, cancelled}. Opaque to most callers; a few plugins parse it. |
+| **writer** | Which tier wrote it last. |
+| **scope** | `run:N \| project:N \| global`. Determines namespace and readership. |
+Visibility and state are independent axes. An entry can be `state=resolved,
+visibility=archived` (complete and hidden) or `state=streaming,
+visibility=summarized` (in-flight, shown as summary) or `state=proposed,
+visibility=visible` (visible, awaiting resolution).
+### Six Primitives {#primitives}
+The entire grammar for changing entries:
+| Verb | Effect |
+|------|--------|
+| **set** | Create or update an entry. Writes content, state, visibility, attributes. |
+| **get** | Promote an entry to `visibility=visible`. The read-with-side-effect. |
+| **rm** | Remove an entry from the caller's view (or delete it when scope permits). |
+| **cp** | Copy an entry to a new path. |
+| **mv** | Rename an entry to a new path. |
+| **update** | Record a turn's continuation or terminal signal. |
+Every tool in rummy (`<sh>`, `<ask_user>`, `<search>`, `<env>`, `<think>`,
+`<known>`, `<unknown>`, …) is a **plugin that composes the six
+primitives**. A `<sh>` invocation becomes a `set` that creates a
+proposed entry; on user accept, a stream plugin drives body appends
+via `set` and eventually a state transition to `resolved`. The
+primitives are the atoms; tools are the molecules.
+### Three Surfaces, One Grammar {#surfaces}
+| Actor | Syntax |
+|-------|--------|
+| **Model** | XML tags: `<set path="..." />` |
+| **Plugin** | RummyContext methods: `rummy.set({...})` |
+| **Client** | JSON-RPC: `{"method":"set","params":{...}}` |
+Syntactic skins over the same semantics. A plugin calling
+`rummy.set(...)`, a client sending `{"method":"set",...}`, and a model
+emitting `<set/>` are the same event at the store layer, authorized by
+the respective writer identity against the scheme's permissions.
+### Four Writer Tiers {#writer_tiers}
+A strict hierarchy of writer identities. Each tier is a superset of
+what's below it:
+| Tier | Access |
+|------|--------|
+| **system** | Internal plumbing (TurnExecutor, AgentLoop audit writes — `instructions://`, `reasoning://`, message schemes). |
+| **plugin** | Declares schemes, registers hooks and filters, calls store methods directly. Everything below plus plugin-scope infrastructure. |
+| **client** | RPC surface. Writes to client-writable schemes (`run://`, proposed-entry state transitions, config) and reads via subscribed notifications. |
+| **model** | XML-tag surface. Writes to model-writable schemes (`known://`, `unknown://`, `update://`, tool-result schemes) as restricted by the active run's capability set. |
+Every scheme declares `writable_by` as a subset of `{system, plugin,
+client, model}`. A write from an identity outside that subset rejects
+with state=failed, outcome="permission:403".
+### Runs Are Entries {#runs_are_entries}
+Starting a run is not a separate API — it is a `set` to
+`run://{alias}` with a prompt body and attributes carrying model,
+restrictions, and resolution strategy. A run plugin observes `run://`
+entry writes and starts the turn loop. Cancelling is a state
+transition to `cancelled` on the same path. Resolving a proposed entry
+is a state transition on that entry's path.
+The lifecycle API is the entry grammar. No parallel verb set.
+### Events & Filters {#events_and_filters}
+Between the primitive-write layer and the actual work, rummy is a
+hooks-and-filters system. Plugins subscribe to events (fire-and-forget
+side effects) and filters (transformation chains that thread a value
+through subscribers in priority order).
+**Every `<tag>` the model sees is a plugin.** `<knowns>` → known
+plugin. `<unknowns>` → unknown plugin. `<performed>` → performed
+plugin. `<previous>` → previous plugin. `<prompt>` → prompt plugin.
+No monolithic assembler decides what goes where. Each plugin filters
+for its own data from the shared row set, renders its section, returns.
 **Plugins compose, they don't coordinate.** A plugin subscribes to a
-filter at a priority. It receives the accumulator value, appends its
-contribution, and returns. It doesn't know what other plugins exist.
+filter at a priority, receives the accumulator value, appends its
+contribution, returns. It doesn't know what other plugins exist.
 Priority determines ordering. Lower numbers run first.
-**The core is a filter chain invocation.** The TurnExecutor computes
-`loopStartTurn` (one value from one row), then calls
-`assembly.system.filter(instructions, ctx)` and
+**The core is a filter chain invocation.** `ContextAssembler` computes
+`loopStartTurn` from the latest prompt entry's `source_turn`, then
+calls `assembly.system.filter(systemPrompt, ctx)` and
 `assembly.user.filter("", ctx)`. Everything else is plugins.
+### Physical Layout
+The contract is realized across two tables plus a compat view:
+- **`entries`** — content layer. `(scope, path)` unique. Body,
+  attributes, hash, tokens.
+- **`run_views`** — per-run projection. Visibility, state, outcome,
+  turn, loop. A run sees an entry only if it has a view row.
+- **`known_entries`** — compatibility VIEW joining the two for legacy
+  SELECT queries. Not writable.
+Server-side bookkeeping (runs, loops, turns, projects, models,
+schemes, file_constraints, turn_context, rpc_log) exists to support
+the contract; the contract's actors never address these tables
+directly.
 ---
-## 1. The Known Store
+## The Known Store {#known_store}
-All model-facing state lives in `known_entries`. Files, knowledge, tool
-results, skills, audit — everything is a keyed entry with a URI scheme,
-body, attributes, and state.
+All model-facing state is stored across two tables joined via the
+`known_entries` compatibility VIEW. Files, knowledge, tool results,
+skills, audit — everything is a keyed entry with a URI path, body,
+attributes, per-run status, and per-run visibility.
-### 1.1 Schema
+### Schema {#schema}
+**Content layer** — `entries` (shared, scope-owned):
 ```sql
-known_entries (
-    id, run_id, loop_id, turn, path, body, scheme,
-    status INTEGER, fidelity TEXT, hash,
-    attributes, tokens, refs, write_count,
-    created_at, updated_at
+entries (
+    id, scope, path, scheme, body, attributes,
+    hash, created_at, updated_at,
+    UNIQUE (scope, path)
 )
 ```
 | Column | Purpose |
 |--------|---------|
-| `path` | Entry identity. Bare paths (`src/app.js`) or URIs (`known://auth`). Max 2048 chars. |
-| `body` | Tag body text. File content, tool output, skill docs. |
-| `attributes` | Tag attributes as JSON. Handler-private workspace. `CHECK (json_valid)` |
-| `scheme` | Generated from path via `schemeOf()`. Drives dispatch and view routing |
-| `status` | HTTP status code (200, 202, 400, 413, etc.) |
-| `fidelity` | Visibility level: full, summary, archive |
-| `hash` | SHA-256 for file change detection |
-| `tokens` | Full-body token cost. Never changes on demotion/promotion. |
-| `turn` | Freshness — when was this entry last touched |
-### 1.2 Schemes, Status & Fidelity
+| `scope` | `global`, `project:N`, or `run:N`. Determines who can read; per-scheme `writable_by` determines who can write. |
+| `path` | Entry identity within scope. Bare paths (`src/app.js`) or URIs (`known://auth`). Max 2048 chars. |
+| `scheme` | GENERATED from `schemeOf(path)`. Drives dispatch and view routing. |
+| `body` | Content. File text, tool output, skill docs. |
+| `attributes` | Tag attributes as JSON. `CHECK (json_valid)`. |
+| `hash` | SHA-256 for file change detection. |
-Every entry has two independent dimensions: **status** (HTTP integer)
-and **fidelity** (visibility level). These are separate concerns.
+Tokens are not stored on entries. See [token_accounting](#token_accounting) — token cost is a property of the materialized packet, computed during assembly, never persisted.
-**Status** (lifecycle): 200 (OK), 202 (proposed), 400 (bad request),
-404 (not found), 409 (conflict), 413 (too large), 499 (aborted),
-500 (error).
+**View layer** — `run_views` (per-run projection):
-**Fidelity** (visibility): `full` (body visible), `summary`
-(model-authored summary), `index` (path only), `archive` (invisible,
-retrievable via `<get>`).
+```sql
+run_views (
+    id, run_id, entry_id, loop_id, turn,
+    status INTEGER, visibility TEXT,
+    write_count, refs, created_at, updated_at,
+    UNIQUE (run_id, entry_id)
+)
+```
-Paths use URI scheme syntax. Bare paths (no `://`) are files.
+| Column | Purpose |
+|--------|---------|
+| `run_id`, `entry_id` | (run, entry) unique pair. Absent view = not in context. |
+| `loop_id`, `turn` | Freshness — when this run last touched the entry. |
+| `status` | HTTP status code — outcome of the run's last operation on this entry. |
+| `visibility` | `visible` \| `summarized` \| `archived`. The run's relationship to the entry. |
+| `write_count` | How many times this run has written this entry. |
+**Compatibility view** — `known_entries` joins the two tables so
+legacy SELECT queries keep working. Not writable; new write code must
+target `entries` + `run_views` directly (see [upsert_semantics](#upsert_semantics)).
+**No shadowing.** A run cannot override a global (or project-scoped)
+entry with a run-scoped copy of the same path. Scope is resolved from
+the scheme's declared `default_scope` at write time; if the writer's
+permission doesn't allow the target scope, the write is rejected
+(403 + `error://`). Paths are unique within a scope, but different
+scopes use independent namespaces — `known://plan` is always run-
+scoped; `wiki://...` (hypothetical) would always be global. The
+scheme plugin owns the decision; the model doesn't juggle scopes.
+**Forks copy views, not content.** `store.forkEntries(parent, child)`
+inserts new `run_views` rows referencing the parent's `entries`
+rows — no body copies, O(row-count) rather than O(body-bytes).
+A forked child's subsequent writes diverge by creating new entries
+at the child's scope; the parent's entries stay untouched.
+### Schemes, Status & Visibility {#schemes_status_visibility}
+Every entry has two independent dimensions: **status** (HTTP integer —
+view-side) and **visibility** (what the model sees — view-side). These
+are separate concerns.
+**Status** (operation outcome): 200 (OK), 202 (proposed), 400 (bad
+request), 403 (permission denied), 404 (not found), 409 (conflict),
+413 (too large), 499 (aborted), 500 (error).
+**Visibility** (the model's view in the run's context): `visible` (body
+shown), `summarized` (path + attrs shown, body hidden or condensed;
+promote via `<get>`), `archived` (invisible; retrievable via pattern
+search).
+Lifecycle events (budget Turn Demotion, fork copy) change `visibility`
+but never `status` — status stays truthful about the last body
+operation. See `demote_turn_entries` in `known_store.sql`.
+Paths use URI scheme syntax. Bare paths (no `://`) are files, stored
+with `scheme IS NULL` (JOINs treat NULL as `'file'` via COALESCE).
 Every entry plays one of four roles:
 | Role | Category | Section | Description |
 |------|----------|---------|-------------|
-| **Data** | `data` | `<knowns>` | Entries the model works with — persistent state |
-| **Logging** | `logging` | `<performed>`/`<previous>` | Records of what happened — tool results, lifecycle signals |
+| **Data** | `data` | `<context>` | Entries the model works with — persistent state and captured payload |
+| **Logging** | `logging` | `<log>` | Records of what happened — tool results, lifecycle signals |
 | **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
 | **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
 `logging` is the default category. Plugins opt into `data` explicitly.
-| Scheme | Category | Description |
-|--------|----------|-------------|
-| `NULL` (bare path) | data | File content. JOINs via `COALESCE(scheme, 'file')`. `file://` prefix stripped by hedberg. |
-| `known://` | data | Model-registered knowledge. One fact per entry. |
-| `skill://` | data | Skill docs. Rendered in system message. |
-| `http://`, `https://` | data | Web content. |
-| `unknown://` | unknown | Unresolved questions. |
-| `prompt://` | prompt | User prompt with `mode` attribute (`ask`/`act`). |
-| `set://`, `get://`, `sh://`, `env://`, `rm://`, `mv://`, `cp://`, `ask_user://`, `search://` | logging | Tool result entries. |
-| `summarize://`, `update://` | logging | Lifecycle signals. |
-| `tool://` | audit | Internal plugin metadata. `model_visible = 0`. |
-| `system://`, `reasoning://`, `model://`, `error://`, `user://`, `assistant://`, `content://` | audit | Audit entries. `model_visible = 0`. |
-### 1.3 Scheme Registry
-The `schemes` table is a bootstrap registry — static rows of
-`(name, model_visible, category)`. Plugins register their scheme
-via `core.registerScheme()` in the constructor. The `model_visible`
-flag controls whether entries appear in `v_model_context`.
-### 1.4 UPSERT Semantics
-INSERT OR REPLACE on `(run_id, path)`. Each write increments `write_count`.
-Blank body is valid. Deletion uses `<rm>`, which removes the row entirely.
+| Scheme | Category | `writable_by` | Description |
+|--------|----------|---------------|-------------|
+| `NULL` (bare path) | data | `model, plugin` | File content. JOINs via `COALESCE(scheme, 'file')`. |
+| `known://` | data | `model, plugin` | Model-registered knowledge. One fact per entry. |
+| `skill://` | data | `model, plugin` | Skill docs. Rendered in system message. |
+| `http://`, `https://` | data | `model, plugin` | Web content. |
+| `sh://`, `env://` | data | `model, plugin` | Streaming-producer payload — stdout/stderr channel entries from shell/env commands. **Channels only**; the action audit record lives in `log://`. See [scheme_category_split](#scheme_category_split). |
+| `unknown://` | unknown | `model, plugin` | Unresolved questions. |
+| `prompt://` | prompt | `plugin` | User prompt with `mode` attribute. Written by prompt plugin, never by model. |
+| `log://` | logging | `system, plugin, model` | Unified audit record namespace for all tool actions. One entry per action at `log://turn_N/{action}/{slug}`. |
+| `update://` | logging | `model, plugin` | Lifecycle signal. Status attr classifies terminal (200/204/422) vs continuation (102). |
+| `error://` | logging | `model, plugin` | Runtime errors — policy rejection, budget overflow (status 413), dispatch crashes, protocol violations. Unified channel via `hooks.error.log.emit`. |
+| `tool://` | audit | `system` | Internal plugin metadata. `model_visible = 0`. |
+| `instructions://`, `system://`, `reasoning://`, `model://`, `user://`, `assistant://`, `content://` | audit | `system` | Audit entries. `model_visible = 0`. Written only by server-level code. |
+### Scheme / Category Split {#scheme_category_split}
+**Scheme determines category.** Every entry's category is looked up
+from its scheme registration; entries of the same scheme always share a
+category. Data and logging never share a scheme.
+Streaming producers (sh, env, and future fetch/search/tail/watch) split
+across two namespaces as a direct consequence:
+- **Action audit record** lives in `log://turn_N/{action}/{slug}` —
+  scheme=`log`, category=`logging`. Renders in `<log>`.
+- **Payload channels** live in `{action}://turn_N/{slug}_N` —
+  scheme=`{action}` (registered as `category: "data"`). Render in
+  `<context>`.
+This keeps `<log>` a terse audit trail (what happened, exit code,
+paths) while `<context>` carries the actual streamed bytes the model
+reads. Conflating the two — e.g., writing channels under `log://...` —
+mislabels payload as audit and pollutes the logging section with
+multi-line command output. See [streaming_entries](#streaming_entries).
+### Scheme Registry {#scheme_registry}
+The `schemes` table is a bootstrap registry — rows of
+`(name, model_visible, category, default_scope, writable_by)`.
+Plugins register their scheme via `core.registerScheme({name, category,
+scope, writableBy})` in the constructor. Defaults:
+`scope = "run"`, `writableBy = ["model", "plugin"]`.
+- `model_visible` — whether entries appear in `v_model_context` (`0`
+  hides audit schemes from the model).
+- `default_scope` — `run` \| `project` \| `global`. Resolved to a
+  concrete scope string at write time (`run:N`, `project:N`, `global`).
+  Project-scoped writes require `projectId` on the call; `Entries.set`
+  throws if it's missing.
+- `writable_by` — JSON array of allowed writer types
+  (`model` \| `plugin` \| `system` \| `client`). `Entries.set` throws
+  `PermissionError` when the caller's writer isn't in the list.
+### UPSERT Semantics {#upsert_semantics}
+Writes go through `Entries.set({runId, path, body, state?, visibility?,
+attributes?, outcome?, turn?, loopId?, writer?, projectId?, ...})`
+— two-prep flow:
+1. `upsert_entry` — INSERT OR UPDATE on `(scope, path)`. Scope comes
+   from scheme's `default_scope`. Returns the `entry_id`.
+2. `upsert_run_view` — INSERT OR UPDATE on `(run_id, entry_id)`.
+   Increments `write_count` on conflict.
+Blank body is valid. Deletion uses `<rm>`, which removes the
+`run_views` row; the shared `entries` row is left for now (GC is a
+future concern).
 ---
-## 2. Relational Tables
+## Relational Tables
 The K/V store is the memory. Relational tables are the skeleton.
@@ -132,9 +323,9 @@ turns    (id, run_id, loop_id, sequence, context_tokens,
           created_at)
 file_constraints (id, project_id, pattern, visibility, created_at)
-  -- Project-level config. NOT tool dispatch. See §2.3.
+  -- Project-level config. NOT tool dispatch. See [file_constraints](#file_constraints).
 turn_context     (id, run_id, loop_id, turn, ordinal, path, scheme,
-                  status, fidelity, body, tokens, attributes,
+                  status, visibility, body, tokens, attributes,
                   category, source_turn)
 rpc_log          (id, project_id, method, rpc_id, params, result, error)
 ```
@@ -146,37 +337,44 @@ name can access any run. Temperature, persona, and context_limit are per-run.
 Clients can add/remove models at runtime via RPC. No default model — the
 client picks for every run.
-### 2.1 Run State Machine
+### Run State Machine {#run_state_machine}
-All status fields are HTTP integer codes:
+All status fields are HTTP integer codes. `runs.status` transitions
+are enforced by `trg_run_state_transition` (see initial migration):
 ```
-100 (queued) → 200 (running) → 202 (proposed) → 200 (running) → 200 (completed)
-                              → 200 (completed)
-                              → 500 (failed) → 200 (running)
-                              → 499 (aborted) → 200 (running)
+100 queued    → 102 running, 499 aborted
+102 running   → 200 completed, 202 proposed, 500 failed, 499 aborted
+202 proposed  → 102 running, 200 completed, 499 aborted
+200 completed → 102 running, 499 aborted
+500 failed    → 102 running, 499 aborted
+499 aborted   → 102 running
 ```
-All terminal states allow transition back to `running`. Runs are long-lived.
+All terminal states (200/500/499) allow transition back to running.
+Runs are long-lived.
-### 2.2 Loops Table
+### Loops Table {#loops_table}
 The loops table IS the prompt queue. Each `ask`/`act` creates a loop.
 FIFO per run (ordered by sequence). One active at a time. Abort stops
 the current loop; pending loops survive. Projects > runs > loops > turns.
-### 2.3 File Constraints
+### File Constraints {#file_constraints}
 The `file_constraints` table is project-level configuration — it
 defines which files a project cares about. This is backbone, not tool
-dispatch. Constraints have three visibilities: `active` (promoted to
-full), `readonly` (promoted but not editable), `ignore` (demoted).
+dispatch. Constraints have three visibilities:
+- `active` — matching files are promoted into the run's context
+- `readonly` — promoted but not editable by the model
+- `ignore` — demoted (excluded from context)
 **Boundary:** Setting a constraint (`File.setConstraint`) is a
 project-config write. Promoting/demoting the matching entries is tool
 dispatch that goes through the handler chain with budget enforcement.
 These are separate operations: constraint persists across runs, entry
-promotion is scoped to a run and subject to the same budget rules as
+visibility is scoped to a run and subject to the same budget rules as
 a model `<get>`.
 `store` RPC manages constraints directly — it is not a model tool.
@@ -184,25 +382,23 @@ a model `<get>`.
 ---
-## 3. Entry-Driven Dispatch
+## Entry-Driven Dispatch
-### 3.1 Unified API
+### Unified API {#unified_api}
-Three callers, one interface. Each tier is a superset of the one below.
+Three callers share a tool vocabulary. The invocation shape is
+per-tier; params shape is not uniform across tiers.
-| Tier | Transport | Invocation shape |
-|------|-----------|-----------------|
-| Model | XML tags | `{ name: "rm", path: "file.txt" }` |
+| Tier | Transport | Invocation |
+|------|-----------|-----------|
+| Model | XML tags | `<rm path="file.txt"/>` |
 | Client | JSON-RPC | `{ method: "rm", params: { path: "file.txt" } }` |
-| Plugin | PluginContext | `rummy.rm({ path: "file.txt" })` |
-`name` (model) = `method` (client) = method name (plugin). The params
-object is the same shape at every tier.
+| Plugin | RummyContext verbs | `rummy.rm("file.txt")` (each verb takes what's natural — see `src/hooks/RummyContext.js`) |
 | Method | Model | Client | Plugin |
 |--------|-------|--------|--------|
-| `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `search` | ✓ | ✓ | ✓ |
-| `known`, `unknown`, `ask_user`, `summarize`, `update` | ✓ | ✓ | ✓ |
+| `think`, `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `search` | ✓ | ✓ | ✓ |
+| `ask_user`, `update` | ✓ | ✓ | ✓ |
 | `ask`, `act`, `resolve`, `abort`, `startRun` | — | ✓ | ✓ |
 | `getRuns`, `getModels`, `getEntries` | — | ✓ | ✓ |
 | `on()`, `filter()`, db/store access | — | — | ✓ |
@@ -210,44 +406,60 @@ object is the same shape at every tier.
 Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
 Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
 `noWeb` excludes `search`, `noProposals` excludes `ask_user`/`env`/`sh`.
-14 model tools: think, unknown, known, get, set, env, sh, rm, cp, mv,
-ask_user, update, summarize, search.
+11 model tools: think, get, set, env, sh, rm, cp, mv, ask_user, update,
+search. The model writes `known` and `unknown` entries via
+`<set path="known://...">` and `<set path="unknown://...">`; those
+plugins don't advertise their own tag name — they render and filter.
 Client tier requires project init. Plugin tier has no restrictions.
-### 3.2 Dispatch Path
+### Dispatch Path {#dispatch_path}
-All three tiers feed the same handler chain:
+Each tier feeds into the shared tool handler chain, but through a
+different entry point:
 ```
-Model:  XmlParser → { name, path, ... } → #record() → dispatch(scheme, entry, rummy)
-Client: JSON-RPC  → { method, params }   → #record() → dispatch(scheme, entry, rummy)
-Plugin: rummy.rm({ path })               → #record() → dispatch(scheme, entry, rummy)
+Model:  XmlParser → { name, path, ... } → TurnExecutor.#record()
+                  → hooks.tools.dispatch(scheme, entry, rummy)
+Client: JSON-RPC  → rpc.js dispatchTool(hooks, rummy, scheme, ...)
+                  → hooks.tools.dispatch(scheme, entry, rummy)
+Plugin: rummy.set({path, body, ...}) / rummy.rm(path) / etc.
+                  → direct entries.* store calls (bypasses the handler chain)
 ```
+Model and client tiers both land in `hooks.tools.dispatch`, which
+invokes the scheme's registered handler. Model-tier additionally
+passes through `TurnExecutor.#record()` (adds turn-scoped recording,
+policy filtering, abort cascade). Plugin-tier convenience verbs
+(`rummy.rm`, `rummy.set`, ...) are thin wrappers over the store — they
+don't invoke the handler chain. Plugin code that wants full handler
+semantics calls `hooks.tools.dispatch` directly.
 **Tool dispatch:** Commands are dispatched sequentially in the order
 the model emitted them. Each tool either succeeds (200), fails (400+),
 or proposes (202). On failure, all remaining tools are aborted. On
 proposal, dispatch pauses, a notification is pushed to the client
-(same WebSocket push pattern as `run/progress`), the client resolves
+(same WebSocket push pattern as `run/state`), the client resolves
 (accept/reject), and dispatch resumes — the proposal becomes 200 or
 400+ like any other tool. The `ask`/`act` RPC response is only sent
 when all tools have completed. Proposals are NOT batched — each is
 sent and resolved inline during dispatch. The model controls tool
 ordering; the system respects it.
-If the model sends `<summarize>` but a preceding action in the same
-turn failed, the summarize is overridden to an update (the model's
-assertion that it's done is false). Both `<summarize>` and `<update>`
-present → last signal wins.
+If the model sends `<update status="200">` (terminal) but a preceding
+action in the same turn failed, the terminal assertion is overridden
+to a continuation (the model's claim of doneness is false); the update
+plugin resolves the update entry to 409 and surfaces it to the next
+turn as a continuation. Multiple `<update>` tags → last signal wins.
-**Post-dispatch budget check:** After all tools dispatch, the system
-materializes context and checks the budget ceiling. If context exceeds
-the ceiling, Turn Demotion fires — all entries from this turn are
-demoted to summary and a `budget://` entry is written. This is a
-system housekeeping step independent of tool success/failure. The
-tools already ran; their outcomes are settled.
+**Post-dispatch budget check:** After all tools dispatch, the budget
+plugin re-materializes context and checks the ceiling
+(`hooks.budget.postDispatch`). If context exceeds the ceiling, Turn
+Demotion fires — all `visible` `run_views` rows for the current turn
+have their `visibility` flipped to `summarized`, and an `error://` entry at status 413 is
+written. Status is NOT touched (see [schemes_status_visibility](#schemes_status_visibility)). The tools already ran;
+their outcomes are settled.
-### 3.3 Plugin Convention
+### Plugin Convention {#plugin_convention}
 A plugin is an instantiated class. The class name matches the file name.
 The constructor receives `core` (a PluginContext) — the plugin's
@@ -259,85 +471,258 @@ export default class Rm {
     constructor(core) {
         this.#core = core;
+        core.ensureTool();
+        core.registerScheme({ category: "logging" });
         core.on("handler", this.handler.bind(this));
-        core.on("full", this.full.bind(this));
+        core.on("visible", this.full.bind(this));
+        core.on("summarized", this.summary.bind(this));
     }
     async handler(entry, rummy) {
         // rummy here is per-turn RummyContext (not the startup PluginContext)
     }
-    full(entry) {
-        return `# rm ${entry.attributes.path}`;
-    }
+    full(entry)    { return `# rm ${entry.attributes.path}`; }
+    summary(entry) { return ""; }
 }
 ```
+**Registration verbs on PluginContext:**
+- `"handler"` — tool handler (dispatches when a matching entry is recorded).
+- `"visible"` / `"summarized"` — visibility view projections. Return the
+  projected body string for the given visibility level.
+- Any hook name (e.g. `"turn.started"`, `"entry.created"`) — subscribes
+  to that event.
+- `core.filter(name, callback, priority)` — subscribes to a filter chain.
 **Two objects:**
 - `this.#core` — PluginContext (startup). For registration: `on()`, `filter()`.
 - `rummy` argument — RummyContext (per-turn). For runtime: tool verbs, queries.
 **Plugin types:**
-- **Tool plugins**: register `handler` + `full`/`summary`. Model-invokable.
-- **Assembly plugins**: register `core.filter("assembly.system", ...)`. Own a packet tag.
-- **Infrastructure plugins**: register `core.on("turn", ...)`. Background work.
+- **Tool plugins**: register `handler` + `visible`/`summarized`. Model-invokable.
+- **Assembly plugins**: register `core.filter("assembly.system"|"assembly.user", ...)`. Own a packet tag.
+- **Infrastructure plugins**: subscribe to lifecycle events
+  (`turn.started`, `turn.response`, `turn.completed`, `entry.created`,
+  `loop.started`, etc.). Background work.
 A plugin can be multiple types. Known is a tool AND an assembly plugin.
-### 3.4 Mode Enforcement
+### Mode Enforcement {#mode_enforcement}
+Two mechanisms, operating at different layers:
+1. **Tool-list exclusion** — `hooks.tools.resolveForLoop(mode, flags)`
+   computes the active tool set at loop start. Ask mode excludes `sh`.
+   Flag-driven exclusions: `noInteraction` removes `ask_user`; `noWeb`
+   removes `search`; `noProposals` removes `ask_user`/`env`/`sh`. The
+   excluded tools don't appear in the system prompt's tool list.
+2. **Per-invocation filtering** — the `policy` plugin subscribes to
+   `entry.recording` and inspects individual emissions for ask-mode
+   violations that the tool-list alone can't catch (file-scheme `<set>`
+   edits, file `<rm>`, file-destination `<mv>`/`<cp>`). Rejects with
+   status 403 and emits `error://`. The tool remains advertised; the
+   specific invocation is blocked.
+### YOLO Mode {#yolo_mode}
+When a run is started with the `yolo: true` attribute (parallel to
+`noRepo`/`noWeb`/`noInteraction`/`noProposals`), the server fully
+emulates a connected headless client: every proposal auto-accepts and
+every sh/env command spawns server-side, streaming output to the
+existing data-channel entries. No client involvement; no human
+approval required.
+**Plumbing.** The `yolo` attribute flows through the same path as
+`noProposals`: `set run://` → `attributes.yolo` → AgentLoop loop config
+JSON → RummyContext.yolo getter. The yolo plugin reads `rummy.yolo`
+off the proposal-pending event payload and engages only when set.
+**Behavior on yolo runs:**
+1. **Auto-accept every proposal.** The yolo plugin listens to
+   `proposal.pending`, replicates AgentLoop.resolve()'s accept path
+   inline (`proposal.accepting` filter for veto, `proposal.content`
+   filter for body, `entries.set state="resolved"`,
+   `proposal.accepted` event for plugin side effects). The
+   `entries.waitForResolution` blocking call wakes immediately; the
+   loop continues without RPC roundtrip.
+2. **Server-side sh/env execution.** For proposals on
+   `log://turn_N/sh/...` or `log://turn_N/env/...`, the yolo plugin
+   spawns the command in `projectRoot`, streams stdout/stderr to
+   `{dataBase}_1`/`{dataBase}_2` via `entries.set append=true`, and
+   transitions channels to terminal state on exit (200 / 500 mirror
+   of the existing `stream/completed` RPC contract). Done in-process,
+   no RPC roundtrip.
+3. **Non-yolo runs unaffected.** Without `yolo: true`, the plugin's
+   `proposal.pending` listener returns early. Existing client-driven
+   resolution (rummy.nvim, AuditClient's file-edit auto-accept) works
+   exactly as before.
+**Use cases.** E2E tests, benchmarks, CI, headless usage. The pattern
+is opt-in per run; rummy.nvim does not set `yolo: true` because
+human-in-the-loop control is the user-facing flow.
+**Architectural placement.** The yolo plugin owns its flag handling
+end-to-end — backbone files (TurnExecutor, AgentLoop) carry only the
+plumbing for the attribute and the rummy-context payload enrichment
+on `proposal.pending`. Feature logic stays in
+`src/plugins/yolo/yolo.js`.
+### Repo Overview {#repo_overview}
+The `rummy.repo` plugin maintains a single `repo://overview` entry per
+run, regenerated on every scan, that gives the model a navigable map
+of the project. It is the entry-point for code-aware runs — files
+themselves default to `archived` so a 5000-file repo doesn't dump
+hundreds of thousands of tokens into context before any work happens.
+**Entry contract.**
+- Path: `repo://overview` (scheme `repo`, category `data`,
+  `model_visible: 1`)
+- Visibility: `visible` (the navigation map is always in context)
+- Body: a markdown structure containing the project root, file count,
+  root-level files, top-level directories with file counts,
+  active/readonly constraints, and a navigation legend showing the
+  promote/demote idioms.
+- Visible projection: full body.
+- Summarized projection: first ~12 lines + a truncation marker, so a
+  model can demote it once it has the layout memorized.
+**File default visibility flip.**
+`FileScanner` registers each tracked file at `archived` by default
+(was `summarized`). Files with `constraint=active` still register at
+`visible`. The model uses `repo://overview` to discover paths, then
+promotes individual files via `<get path=...>` (visible, full body)
+or whole subtrees via `<set path=".../**" visibility="summarized"/>`
+(skim mode, symbols only).
+**Bounded cost.** The overview body is constant-ish in size regardless
+of repo size: root files capped, directory counts aggregated, no per-
+file symbol enumeration. The token cost in context stays roughly
+flat from a 30-file project to a 50,000-file monorepo.
+**Disabled when noRepo.** Setting `noRepo: true` on a run skips the
+scan entirely; no `repo://overview` is created and no file entries
+are registered. Behaviour identical to pre-plugin runs.
+### Streaming Entries {#streaming_entries}
+Producers that generate output over time (shell commands, web fetches,
+log tails, file watches) use the streaming-entry pattern. Entry
+lifecycle extends beyond the synchronous 202→200/400+ flow.
+**Lifecycle:**
+```
+202 Proposal (user decision pending)
+  → accept → 200 (log entry: action complete) + 102 data entries
+  → reject → 403
+```
+**Entry shape for a streaming producer** — two namespaces per
+invocation, one for the audit record, one for the payload (see
+[scheme_category_split](#scheme_category_split)):
+```
+log://turn_N/{action}/{slug}    scheme=log       category=logging   status=202→200
+                                body: "ran 'command', exit=0, Output: {paths}"
+                                (renders in <log>)
+{action}://turn_N/{slug}_1      scheme={action}  category=data      status=102 → 200/500
+                                body: primary stream (stdout for shell)
+                                summary="{command}" visibility=summarized
+                                (renders in <context>)
+{action}://turn_N/{slug}_2      scheme={action}  category=data      status=102 → 200/500
+                                body: alt stream (stderr for shell)
+                                (renders in <context>, often empty)
+```
+`{action}` is the producer plugin's name (`sh`, `env`, future: `search`,
+`fetch`, ...). The stream RPC accepts the **log-entry path** and derives
+the data base internally via `logPathToDataBase` — see
+[stream_plugin](#stream_plugin).
+**Channel numbering follows Unix file descriptor convention.** Channel
+1 is primary output (stdout for shell); channel 2 is alternate/error
+output (stderr); higher numbers for additional producer-specific
+channels. Non-process producers (search, fetch) map their streams onto
+the same numeric space: `_1` for the primary data stream, `_2` for
+anomalies/errors, `_3`+ for auxiliary streams.
+**Status 102 ("Processing") marks an entry in mid-stream:** body is
+partial, will change; tokens grow as chunks arrive. Agents reading a
+102 entry use `<get>` with `line`/`limit` (including negative `line`
+for tail) to sample without promoting full body.
+**Status transition on completion** is terminal: 200 (exit_code=0 or
+N/A for non-process producers), 500 (non-zero exit), or 499 (client
+aborted via `stream/aborted`). The log entry is rewritten with final
+stats (exit code, duration, channel sizes, or abort reason).
+**Budget demotion preserves status.** A 102 entry demoted by Turn
+Demotion stays at 102 — status reflects operation outcome, visibility
+reflects visibility. See [schemes_status_visibility](#schemes_status_visibility) for the status-vs-visibility separation.
-All tools are available by default. In ask mode, the core removes
-act-only tools (`sh`, file-scheme `set`) from the tool list. This is
-a core concern — plugins do not declare their modes.
+**Stream plugin ([plugin_system](#plugin_system)) owns the append and completion RPCs.** Producer
+plugins (sh, env) create the proposal and data entries; the stream
+plugin handles the subsequent growth and terminal transitions.
 ---
-## 4. Message Structure
+## Message Structure {#message_structure}
 Two messages per turn. System = stable truth. User = active task.
-### 4.1 Packet Structure
+### Packet Structure {#packet_structure}
 ```
-[system]
-    [instructions]
-        [sacred_prompt/]
-        [toolDescriptions/]
-        [persona/]
-        [skills/]
-    [/instructions]
-    <knowns>
-        ...entries sorted by fidelity (summary, full), then by scheme
-    </knowns>
-    <previous>
-        (pre-loop entries, each with turn, status, summary, fidelity, tokens)
-    </previous>
+[system message]
+    instructions text
+        (instructions.md base template + tool docs injected via
+         instructions.toolDocs filter; optional persona appended)
+    <context>
+        all category=data entries (knowns, files, http/https),
+        wrapped by known.js on assembly.system at priority 100
+    </context>
+[user message]
+    <log>
+        action history — log:// entries + pre-latest prompts
+        (log.js, assembly.user priority 100)
+    </log>
     <unknowns>
-        (open questions, each with path, turn, fidelity, tokens)
+        (open questions at category=unknown, unknown.js priority 200)
     </unknowns>
-[/system]
-[user]
-    <performed>
-        (current loop entries, each with turn, status, summary, fidelity, tokens)
-    </performed>
-    <progress turn="N">token budget, fidelity stats, causal bridge</progress>
-    <prompt mode="ask|act" tools="...">user prompt</prompt>
-[/user]
+    <instructions>
+        current phase directive — one of instructions_104.md …
+        instructions_108.md, selected by the latest <update status="1XY">
+        emission (instructions.js, assembly.user priority 250)
+    </instructions>
+    <prompt mode="ask|act" tokenUsage="N" tokensFree="M">user prompt</prompt>
 ```
-**System** contains everything the model needs to know.
-**User** contains everything the model needs to do.
+**System** = stable world state the model operates within (identity,
+tools, tool docs, reference context). Stable across turns within a
+run, which keeps prompt caching intact. **User** = active work (what
+the model is doing right now): history, open questions, current
+phase, and current prompt. The phase-specific `<instructions>` block
+lives in the user message precisely *because* it changes between
+turns — putting it in system would invalidate the cache on every
+phase transition.
 The `<prompt>` tag is present on every turn — first turn and
 continuations alike. The model always sees its task. The active prompt
 is extracted from its chronological position and placed last for maximum
-recency. `<progress>` bridges the gap, narrating the causal relationship
-between `<performed>` (the work) and the prompt (the cause).
+recency. The `<prompt>` element carries `tokenUsage` / `tokensFree`
+attributes so the model can do budget arithmetic in-line with the cause.
-### 4.2 Loops, Previous, and Performed
+### Loops, Previous, and Performed {#loops_previous_performed}
 A **loop** is one `ask` or `act` invocation and all its continuation
-turns until summarize, fail, or abort.
+turns until `<update status="200">`, fail, or abort.
 **Previous** = all completed loops on this run. The user prompt, model
 responses, tool results, agent warnings — the full chronicle in order.
@@ -353,11 +738,11 @@ When a new prompt arrives on an existing run, the prior loop's
 `<performed>` content plus its prompt move to `<previous>`. When a loop
 continues (next turn), new results append to `<performed>`.
-### 4.3 Key Entries
+### Key Entries {#key_entries}
 | Path | Lifetime | Body | Attributes |
 |------|----------|------|-----------|
-| `instructions://system` | One per run (mutable) | Empty (projection builds from preamble + plugins) | `{ persona }` |
+| `instructions://system` | One per run (mutable) | Empty (projection builds from `instructions.md` + tool docs + optional persona) | `{ persona, toolSet }` |
 | `system://N` | Audit, one per turn | Full assembled system message | — |
 | `user://N` | Audit, one per turn | Full assembled user message | — |
 | `assistant://N` | Audit, one per turn | Model's raw response | — |
@@ -367,97 +752,176 @@ framework auto-populates `toolDescriptions` from tool registrations
 that include `docs`. The instructions projection assembles the final
 text from body + attributes.
-### 4.4 Materialization
+### Materialization {#materialization}
 Each turn:
-1. Write `instructions://system` (empty body, attributes = { persona })
+1. Write `instructions://system` (empty body, attributes = { persona, toolSet })
 2. Emit `turn.started` — plugins write prompt/instructions entries
-3. Project `instructions://system` → instructions text
-4. Query `v_model_context` VIEW → visible entries
-5. Project each entry through its tool's `full`/`summary` projection
+3. Resolve the instructions system prompt (`hooks.instructions.resolveSystemPrompt`)
+4. Query `v_model_context` VIEW → visible entries (joined from
+   `run_views` + `entries` + `schemes`)
+5. Project each entry through its scheme's `visible`/`summarized` projection
 6. Insert projected rows into `turn_context`
 7. Invoke `assembly.system` filter chain (instructions text as base):
-   - Known plugin (priority 100) → `<known>` section
+   - Known plugin (priority 100) → `<knowns>` section
    - Previous plugin (priority 200) → `<previous>` section
-   - Unknown plugin (priority 300) → `<unknowns>` section
 8. Invoke `assembly.user` filter chain (empty string as base):
    - Performed plugin (priority 100) → `<performed>` section
-   - Progress plugin (priority 200) → `<progress>` section
-   - Prompt plugin (priority 300) → `<prompt>` section
-9. Store as `system://N` and `user://N` audit entries
-The VIEW determines visibility from `fidelity` and `status`:
-- `full` → body visible
-- `summary` → summary visible (model-authored `summary` attribute if set)
-- `index` → path listed, no content
-- `archive` → invisible (retrievable via `<get>`)
-**Partial read:** `<get path="..." line="N" limit="M"/>` returns lines N through
-N+M−1 of the entry body as the log item without changing fidelity or promoting
-the entry to context. Use after reading `summary` fidelity (which gives line
-numbers via repomap) to target a specific symbol. Single-path only — glob or
-body filter with `line`/`limit` is a 400 error.
-- `status = 202` → invisible (proposed, pending client)
-- `model_visible = 0` → invisible (audit, tool, instructions)
-Model controls fidelity via `<set>` attributes: `archive`, `summary`,
-`index`, `full`. The `summary="..."` attribute attaches a description
-(<= 80 chars) that persists across fidelity changes.
-### 4.5 Budget Enforcement
+   - Unknown plugin (priority 200) → `<unknowns>` section
+   - Prompt plugin (priority 300) → `<prompt>` element (carries
+     `tokenUsage` / `tokensFree` attrs when `contextSize` is set)
+9. Store as `system://N` and `user://N` audit entries (telemetry plugin)
+The VIEW determines visibility from `visibility` and `status`:
+- `visibility = 'visible'` → full body visible in `<knowns>` / `<performed>`.
+- `visibility = 'summarized'` → summarized projection visible (typically path +
+  summary attr). Promote with `<get>` to expand.
+- `visibility = 'archived'` → invisible. Discoverable via pattern search
+  (`<get path="known://*">keyword</get>`); promote to bring back into view.
+- `status = 202` → invisible (proposed, pending client resolution).
+- `model_visible = 0` → invisible (audit schemes: instructions, system,
+  reasoning, model, user, assistant, content, tool).
+**Partial read:** `<get path="..." line="N" limit="M"/>` returns lines N
+through N+M−1 of the entry body as the log item without changing
+visibility or promoting the entry to context. Use after reading a
+demoted entry (which shows path + summary) to target a specific slice.
+Single-path only — glob or body filter with `line`/`limit` is a 400 error.
+Model controls visibility via `<set>` attributes:
+`visibility="archived|summarized|visible"`. The `summary="..."` attribute
+attaches a description (≤ 80 chars) that persists across visibility
+changes.
+### Token Accounting {#token_accounting}
+Tokens are a property of the materialized packet, not of stored entries.
+They are computed during assembly, exposed on the materialization records,
+and consumed by the budget plugin for the model-facing `<budget>` table.
+Nothing else in the system has its own opinion of "what an entry costs."
+**Per-entry materialization records** carry three token measures:
+| Field | Meaning |
+|---|---|
+| `vTokens` | Wire cost when the entry is fully visible. The body rendered through the scheme's `visible` view, wrapped in its envelope tag, tokenized. |
+| `sTokens` | Wire cost when the entry is summarized. The body rendered through the scheme's `summarized` view (typically a projection or 500-char preview), wrapped in its envelope tag, tokenized. |
+| `aTokens` | `vTokens − sTokens`. The promotion premium — the marginal cost of the entry being visible rather than summarized. The only token measure exposed to the model on per-entry tags. |
+The model sees `tokens="N"` on each entry tag. That `N` is `aTokens`. It
+means: *demoting this entry frees `N` tokens; promoting this entry from
+summarized to visible costs `N` tokens.* The number is a pure lever — no
+body-vs-wire ambiguity, no envelope overhead surprise.
+**Floor and premium.** A run's packet decomposes into:
+- **Summarized floor** = sum of `sTokens` for all non-archived entries.
+  Paid regardless of any visibility decision the model can make. Includes
+  the per-entry projection cost for every entry that's either `visible`
+  (since visible entries also pay their projection-cost-equivalent within
+  vTokens) or `summarized`.
+- **Visibility premium** = sum of `aTokens` for currently-visible entries.
+  The active cost of visibility decisions. The model's lever.
+- **System overhead** = system prompt + tool definition tokens. Constant
+  per turn, not addressable by the model.
+`tokenUsage = floor + premium + system`. `tokensFree = ceiling − tokenUsage`.
+**`<budget>` rendered shape** (between `<instructions>` and `<prompt>`,
+priority 275):
-The model owns its context. The system enforces a hard ceiling and
-provides advisory warnings — it does not automatically manage entries.
-**Pre-LLM check:** The budget plugin measures `countTokens()` on the
-assembled messages. If assembled tokens exceed `contextSize`, the turn
-returns 413 without calling the LLM. This triggers panic mode (see
-§4.6).
-**Write-layer gate:** BudgetGuard on KnownStore gates every write
-during dispatch. `upsert()`, `promoteByPattern()`, and
-`updateBodyByPattern()` check token delta against remaining headroom.
-Exceeding the budget throws `BudgetExceeded` — the tool 413s, the
-guard trips, and all subsequent tools in the turn fail.
-BudgetGuard ceiling = `floor(contextSize × 0.9) − 500`. The 500-token
-buffer below the enforce ceiling absorbs two sources of overhead that
-BudgetGuard cannot see: (a) `#record()`-phase writes that bypass the
-guard (~15 tokens per command), and (b) loop transition overhead —
-when a loop completes and a new one starts, entries shift from
-`<performed>` to `<previous>` format, adding ~200–300 tokens to the
-next assembly. Without this buffer, the base context can accumulate
-to exactly the enforce ceiling, making it impossible for the panic
-loop to start (panic prompt + loop overhead > ceiling).
-**Exemptions:** `status >= 400` entries (error results), `model_visible
-= 0` entries (audit), `fidelity = "archive"` entries (not in context).
-**Size gate:** Known entries exceeding 500 tokens are rejected with
-413, forcing atomic entries.
-**Advisory warnings** (progress plugin):
-- 50%: "You may free space by lowering the fidelity of entries"
-- 75%: "YOU MUST free space... or the run will fail"
+```
+<budget tokenUsage="N" tokensFree="M">
+| scheme | visible | tokens | % |
+|---|---|---|---|
+| <scheme> | <count> | <sum-of-aTokens> | <%-of-ceiling> |
+... rows for visible-scheme breakdown, sorted desc by tokens ...
-**Token math:** `Math.ceil(text.length / RUMMY_TOKEN_DIVISOR)`. One
-formula, one file (`src/agent/tokens.js`), env-configurable. No
-external dependencies. `contextSize` is the ceiling. Over = 413.
-Under = 200. No margins.
+Summarized: <count> entries, <sum-of-sTokens> tokens (<%>% of budget).
+System: <token-count> tokens (<%>% of budget).
+Total: <visible-count> visible + <summarized-count> summarized entries; tokenUsage <N> / ceiling <C>. <M> tokens free.
+</budget>
+```
-**Three token measures — never conflate them:**
+**Why the table only contains visible scheme rows.** The `tokens` column
+in the table is `aTokens` — the action lever. Per-entry visibility of
+summarized entries is intentionally not surfaced; surgical pruning of
+individual high-signal summaries is the wrong action shape. The
+summarized aggregate line below the table is the only signal for that
+class — actionable via glob (`<set path="known://oldsession/*"
+visibility="archived"/>`), not per-entry.
+**Where the math is computed.** Materialization (the assembly path
+through `materializeContext.js` and `ContextAssembler.js` plus per-scheme
+view handlers) renders each entry's visible and summarized projections,
+wraps them in their envelope, and tokenizes both. The resulting per-entry
+record carries `vTokens`/`sTokens`/`aTokens` alongside the projected
+text. The budget plugin's `assembleBudget` filter consumes this; no other
+caller measures tokens.
+**Body-size gates** (e.g. `known.js` MAX_ENTRY_TOKENS) compute
+`countTokens(body)` inline at write time. They check intrinsic body
+size, not wire cost — the materialization record doesn't yet exist when
+an entry is being written.
+### Budget Enforcement {#budget_enforcement}
-| Measure | Source | Scope | Use |
-|---|---|---|---|
-| SQL entry tokens | `known_entries.tokens` = `ceil(chars / DIVISOR)` | Per entry | Model decision-making: "this entry costs N tokens" |
-| Assembled estimate | `measureMessages(messages)` = sum of entry projections | Full packet | First-turn budget fallback only |
-| Actual API tokens | `turns.context_tokens` = `usage.input_tokens` back-filled from LLM | Per turn | Budget enforcement on turns 2+; ground truth |
+The model owns its context. The system enforces a hard ceiling and
+surfaces the numbers — it does not automatically manage entries.
+**Ceiling.** `ceiling = floor(contextSize × RUMMY_BUDGET_CEILING)`
+(default `RUMMY_BUDGET_CEILING = 0.9`, i.e. 10% headroom). All budget
+decisions compare `assembledTokens` against `ceiling`, never against
+`contextSize` directly.
+**Pre-LLM enforce** (`hooks.budget.enforce`, in TurnExecutor before
+the LLM call). Measures the assembled messages (using
+`turns.context_tokens` from the prior turn when available,
+`countTokens(messages)` as a first-turn estimate).
+- `assembledTokens ≤ ceiling` → return 200, proceed to LLM.
+- `assembledTokens > ceiling` on the first turn of a loop → **Prompt
+  Demotion**: demote the incoming `prompt://N` entry to `visibility =
+  demoted`, re-materialize, re-check. If the retry fits, proceed.
+- `assembledTokens > ceiling` on a non-first turn, or still over after
+  Prompt Demotion → return 413. AgentLoop exits the loop with 413.
+**Post-dispatch Turn Demotion** (`hooks.budget.postDispatch`, after
+all tool dispatches complete). Re-materializes end-of-turn context
+and re-checks. If still over the ceiling, flips every `run_views` row
+for this turn from `visibility = visible` to `visibility = summarized`
+(status preserved — see [schemes_status_visibility](#schemes_status_visibility))
+and emits a 413 error via `hooks.error.log.emit` with the descriptive
+body (what was demoted, the 50% rule for the next turn). The model
+sees the `error://` entry next turn and adjusts.
+**LLM-reported context exceeded.** If the LLM rejects the request
+with a "context too long" error (detected via the regex in
+`src/llm/errors.js`), the LlmProvider raises `ContextExceededError`
+which TurnExecutor catches and emits a 413 error through the same
+channel.
+**Known-scheme size gate** (in the `known` plugin). Writes to
+`known://` entries exceeding `RUMMY_MAX_ENTRY_TOKENS` (default 512)
+are rejected at the handler with an instructive error message. Forces
+atomic entries instead of dumping transcripts into a single `known://`.
+**Advisory feedback.** The model reads `tokensFree` / `tokenUsage`
+attributes on `<budget>` every turn and self-regulates. The full
+breakdown (per-scheme visible cost, summarized aggregate, system
+overhead) lives in the same tag — see [token_accounting](#token_accounting)
+for the rendered shape and the contract for what each number means.
+No threshold-based warnings. When the ceiling is actually breached the
+413 `error://` entry is the feedback.
-`budget.enforce` uses the **actual API tokens** (`get_last_context_tokens`) when
-available (turn 2+) and falls back to the assembled estimate on turn 1. The
-estimate can be 3–7× off for XML/JSON-heavy content — do not rely on it for
-anything that matters.
+**Token math:** `Math.ceil(text.length / RUMMY_TOKEN_DIVISOR)`. One
+formula, one file (`src/agent/tokens.js`), env-configurable. No
+external dependencies. All costs surfaced to the model and the budget
+guard come through materialization (see [token_accounting](#token_accounting));
+the budget guard's pre-LLM check uses the actual API tokens
+(`turns.context_tokens` from the prior turn) when available, falling
+back to the materialized packet estimate on turn 1.
 **`context_tokens` vs `prompt_tokens` in step telemetry:**
 - `context_tokens` in the step JSON = `turns.context_tokens` for that turn =
@@ -469,92 +933,14 @@ These two will diverge rapidly on any multi-turn run. A run at turn 50 might sho
 `context_tokens: 8000` (context under control) and `prompt_tokens: 400000`
 (total input tokens billed across the whole run). They are measuring orthogonal things.
-### 4.6 Panic Mode
-**The invariant.** A panic is only ever triggered because the
-assembled context was under the ceiling — and the new prompt pushed
-it over. The existing context fit; the incoming prompt did not.
-Panic mode replaces that too-large incoming prompt with a small
-panic prompt on the same context. Therefore: the first turn of a
-panic loop cannot 413. If it does, it is a bug.
-**Trigger.** `TurnExecutor.execute()` assembles the full packet
-(context + incoming prompt) before calling the LLM. If
-`assembledTokens > contextSize`, it returns 413 without calling
-the LLM. `#drainQueue` intercepts this and enters panic mode.
-**Flow.**
-1. Complete the failed loop with status 413 (audit trail).
-2. Enqueue a panic loop (`mode = "panic"`, `noRepo = true`,
-   `prompt = panicPrompt`, `panicTarget` in config).
-3. Re-enqueue the original loop with `panicAttempted: true` in
-   its config JSON. This flag persists across drain cycles.
-4. `continue` — the drain loop claims the panic loop next.
-After panic completes (model freed enough space), the retry loop
-runs. If the retry also 413s, hard-fail to client. One panic
-attempt per drain cycle — `panicAttempted` is checked both as a
-local variable and on the re-enqueued loop's config.
-**Panic target.** The model must compress context to below:
-```
-panicTarget = MIN(contextSize × 0.75, contextSize − incomingTokens) − cushion
-```
-`incomingTokens` is the raw token count of the original prompt.
-`cushion` is a small safety margin (500 tokens) to absorb
-materialization overhead. The target is expressed in materialized
-token units — the same unit the system uses to measure completion
-(see Token Math below).
-**Two token contexts.**
-The model reasons in *per-entry SQL tokens* — the token counts
-visible in `<knowns>` entries. These are the granular unit the model
-uses to decide which entries to target: "this entry is 200 tokens;
-if I archive it, I save 200 tokens."
-The system makes decisions using *actual API tokens* —
-`turns.context_tokens` back-filled from `usage.input_tokens` after
-each LLM call. SQL token sums do not equal actual API counts because
-projections, assembly overhead, and fidelity transforms alter the
-output; and the SQL estimate (`ceil(chars / DIVISOR)`) can be 3–7×
-off for structured content. **Never use SQL token sums for ceiling or
-budget decisions.** See §4.5 Token Measures for the full breakdown.
-**Strike system.** After each panic turn, compare
-`result.assembledTokens` (materialized) with `_lastPanicTokens`
-(previous turn's materialized total):
-- Decreased → reset strike counter to 0.
-- Same or increased → increment strikes.
-- 3 consecutive strikes → return 413 to `#drainQueue` → hard-fail.
-Progress (any reduction) resets the counter. The model has
-unlimited turns as long as it makes progress.
-**Panic success.** After each turn, if `result.assembledTokens
-<= panicTarget`, the panic loop exits with 200. The retry loop
-then runs with the original prompt on the now-compressed context.
-**Tool set.** `resolveForLoop("panic")` includes: get, set, known,
-unknown, rm, mv, cp, summarize, update. Excludes: sh, env, search,
-ask_user. `noRepo: true` — no file scanning during panic.
-**What the model sees.** Turn 1 receives the panic prompt from
-`budget.panicPrompt()`: the assembled token count, the target, and
-the exact number of tokens to free. Turn 2+ receives a continuation
-prompt. The model uses `<set fidelity="archive">`, `<mv
-fidelity="summary">`, and similar fidelity operations to free space,
-concluding with `<summarize>` when done or `<update>` while working.
 ---
-## 5. RPC Protocol
+## RPC Protocol
 JSON-RPC 2.0 over WebSocket. `discover` returns the live catalog.
-### 5.1 Methods
+### Methods {#rpc_methods}
 #### Protocol
@@ -607,6 +993,25 @@ be added explicitly by the client).
 `noInteraction` removes `ask_user` from the tool list.
 `noWeb` removes `search` from the tool list.
+#### Streaming (see [streaming_entries](#streaming_entries))
+| Method | Params |
+|--------|--------|
+| `stream` | `{ run, path, channel, chunk }` |
+| `stream/completed` | `{ run, path, exit_code?, duration? }` |
+| `stream/aborted` | `{ run, path, reason?, duration? }` |
+| `stream/cancel` | `{ run, path, reason? }` |
+Producer-agnostic RPC for streaming output into data entries created by
+any plugin (sh/env today; search/fetch/watch as future consumers). The
+`stream` method appends `chunk` to `{path}_{channel}`; `stream/completed`
+transitions all `{path}_*` channels to terminal status (200/500) and
+finalizes the log entry body; `stream/aborted` is the client-initiated
+cancellation counterpart, transitioning channels to **499** (Client
+Closed Request); `stream/cancel` is the server-initiated counterpart
+(transitions to 499 and pushes `stream/cancelled` notification to
+connected clients). `stream/cancel` also handles stale 102 cleanup.
 #### Queries
 | Method | Params |
@@ -628,28 +1033,65 @@ be added explicitly by the client).
 Skills loaded from `RUMMY_HOME/skills/{name}.md`. Personas from
 `RUMMY_HOME/personas/{name}.md`.
-### 5.2 Notifications
+### Notifications {#notifications}
+| Notification | Scoped by | Purpose |
+|-------------|-----------|---------|
+| `rummy/hello` | connection | Server greeting on client connect. Carries `rummyVersion` (semver). Clients check MAJOR and refuse on mismatch. |
+| `run/state` | projectId | Turn state snapshot (status, history, unknowns, telemetry). Fires per command dispatch (incremental 102), at turn conclusion (verdict status), and at terminal run close. |
+| `run/progress` | projectId | Transient turn activity (`thinking` / `processing` / `retrying`). |
+| `run/proposal` | projectId | A 202 entry is awaiting resolution. |
+| `stream/cancelled` | projectId | Server-initiated streaming cancellation. |
+| `ui/render` | projectId | Streaming UI output (e.g. tool progress). |
+| `ui/notify` | projectId | Toast notification. |
+**`run/state` payload shape** — the unified contract for both the
+notification and `getRun` RPC:
+```jsonc
+{
+  "run": "gemma_1234567890",
+  "turn": 4,
+  "status": 102,              // numeric HTTP status
+  "summary": "…",             // latest <update status="200"> body, or ""
+  "history": [                // chronological per-entry log
+    {
+      "tool": "set",
+      "path": "known://president/current",
+      "status": 200,
+      "body": "Donald Trump is the 47th president…",
+      "turn": 4,
+      "attributes": "{\"summary\":\"president,current,trump\",\"visibility\":\"visible\"}"
+    }
+  ],
+  "unknowns": [{ "path": "unknown://…", "body": "…" }],
+  "telemetry": null | { /* final end-of-turn usage; null on mid-turn emissions */ }
+}
+```
+`history` includes every entry the model has touched this run in
+timeline order — prompt entries, unknowns, tool results. `attributes`
+is raw JSON; parse client-side. Mid-turn emissions have `telemetry:
+null`; the final emission of each turn includes the full telemetry
+block (token usage, context distribution, cost).
-| Notification | Scoped by |
-|-------------|-----------|
-| `run/state` | projectId |
-| `run/progress` | projectId |
-| `ui/render` | projectId |
-| `ui/notify` | projectId |
+`stream/cancelled` payload: `{ run, path, reason }`. Server has
+already transitioned the entries to 499 (`Client Closed Request`);
+client should stop sending `stream` chunks for that path.
-### 5.3 Resolution
+### Resolution {#resolution}
 | Resolution | Model signal | Outcome |
 |-----------|-------------|---------|
 | reject | any | `completed` — rejection stops the bus |
-| accept | `<update>` | `running` — model has more work |
-| accept | `<summarize>` | `completed` |
+| accept | `<update status="102">` | `running` — model has more work |
+| accept | `<update status="200|204|422">` | `completed` — terminal |
 | accept | neither | `running` — healer decides |
 | error | any | `running` — error state, model retries |
 ---
-## 6. Plugin System
+## Plugin System {#plugin_system}
 See [PLUGINS.md](PLUGINS.md) for the full plugin development guide,
 including the RummyContext API, tool registration, handler chains,
@@ -659,7 +1101,7 @@ Each plugin has its own README at `src/plugins/{name}/README.md`.
 ---
-## 7. Tool Documentation Design
+## Tool Documentation Design {#tool_documentation}
 Tool docs are the most carefully designed text in rummy. Every line
 simultaneously teaches syntax, implies workflow priority, demonstrates
@@ -674,12 +1116,13 @@ simple to powerful — weak models learn from examples 1-2, strong models
 pick up the pattern from example 3.
 **Lifecycle continuity.** Examples weave stories across tools. The get
-docs end with `<set path="..." fidelity="summary"/>`. The known docs
-reference `<get path="known://*">keyword</get>` for recall and
-`<set path="known://..." archive/>` for archiving. The unknown docs
-reference `<get/>` for investigation and `<rm/>` for cleanup. A model
-reading the full tool docs encounters a coherent workflow:
-discover → load → reason → edit → archive → recall.
+docs demonstrate `<get path="known://*">keyword</get>` for pattern recall
+and `<get path="..." line="N" limit="M"/>` for partial reads that don't
+promote. The known docs reference `<get path="known://*">keyword</get>`
+for recall. The unknown docs reference `<set path="unknown://..."
+visibility="archived"/>` for retiring resolved questions, `<get/>` for
+investigation. A model reading the full tool docs encounters a coherent
+workflow: discover → load → reason → edit → archive → recall.
 **RFC 2119 semantics.** Constraint bullets use YOU MUST, YOU MUST NOT,
 YOU SHOULD, YOU MAY from RFC 2119. Every LLM has extensive pretraining
@@ -732,7 +1175,7 @@ are universal — not a feature of any single tool.
 ---
-## 8. Hedberg Editing Syntax
+## Hedberg Editing Syntax {#hedberg}
 The model picks its preferred edit format. The parser understands all of them:
@@ -747,7 +1190,7 @@ The model picks its preferred edit format. The parser understands all of them:
 ---
-## 9. Response Healing
+## Response Healing {#response_healing}
 The server never throws on model output. "Model behavior" is never an
 acceptable explanation. Recovery order:
@@ -757,15 +1200,16 @@ acceptable explanation. Recovery order:
 3. Did our structure cause this? Check formatting, prompts.
 Termination protocol:
-- `<summarize>` → run terminates
-- `<summarize>` + failed actions → overridden to `<update>` (continue)
-- `<update>` → run continues
-- Both → last signal wins (respects the model's final intent)
-- Neither + investigation tools → stall counter (RUMMY_MAX_STALLS)
-- Neither + action-only tools → healed to summarize
-- Neither + plain text → healed to summarize
+- `<update status="200|204|422">` → run terminates
+- `<update status="200">` + failed actions → overridden to continuation
+  (the claim of doneness is refuted by the failures)
+- `<update status="102">` → run continues
+- Multiple `<update>` → last one wins
+- No `<update>` + investigation tools → stall counter (RUMMY_MAX_STALLS)
+- No `<update>` + action-only tools → healer infers terminal from body
+- No `<update>` + plain text → healer infers terminal from body
 - Repeated commands → cycle detection (RUMMY_MIN_CYCLES, RUMMY_MAX_CYCLE_PERIOD)
-- Repeated update text → stall (RUMMY_MAX_UPDATE_REPEATS)
+- Repeated update text without non-update work → stall (RUMMY_MAX_UPDATE_REPEATS)
 Format normalization:
 - Gemma `\`\`\`tool_code` fences → stripped before parsing
@@ -776,7 +1220,7 @@ Format normalization:
 ---
-## 10. Testing
+## Testing
 | Tier | Location | LLM? |
 |------|----------|------|
@@ -788,9 +1232,66 @@ Format normalization:
 E2E tests must NEVER mock the LLM. Environment cascade:
 `.env.example` → `.env` → `.env.test`. Always use `npm run test:*`.
+### Spec-Anchored Testing
+Integration and e2e tests MUST be anchored to SPEC.md's snake_case
+anchor system. The rule is bidirectional:
+1. **Every SPEC.md heading with a `{#snake_case_id}` anchor has at
+   least one integration or e2e test that references it.** The
+   reference is literal: an `@snake_case_id` token appearing in the
+   test file (suite name, test name, or comment). A heading without
+   a test reference is a spec with no verified guarantee.
+2. **Every integration or e2e test is attributed to at least one
+   `@`-reference.** A test describing behavior that isn't in SPEC
+   either adds the behavior to SPEC or isn't under the integration
+   / e2e tiers.
+Enforcement: `npm run test:spec` parses SPEC.md's `{#id}` anchors
+and greps `test/integration/` + `test/e2e/` for `@id` references.
+Missing references fail the script. The check runs in CI and blocks
+merges.
+Unit tests (`src/**/*.test.js`) are exempt — they verify
+implementation details, not spec guarantees.
+**Why snake_case, not numeric `§X.Y`:** slugs are stable identifiers
+independent of section ordering. Numbering required a rewrite of
+every test reference whenever SPEC.md reorganized. Slugs never
+churn — rename a section's text, leave the anchor, no tests break.
+**Anchor naming rules:**
+- Lowercase `[a-z0-9_]`, underscores for word separation.
+- Unique across the whole document.
+- Stable once published: treat as a permanent identifier; renames
+  are a breaking change requiring a test sweep.
+- Short and semantic (`entries`, not `section_0_1_the_entry_contract`).
+**When a section doesn't get an anchor:** umbrella sections (parents
+of testable subsections, like "The Contract" or "RPC Protocol") and
+pure-documentation sections (env var listings, debugging procedures,
+this section itself) stay as plain headings. The anchor *implies
+testability* — if there's nothing observable to verify, adding an
+anchor creates a permanent false obligation.
+**PLUGINS.md and `src/plugins/*/README.md`** participate in the
+same coverage gate as SPEC.md. `npm run test:spec` scans all three
+sources for `{#snake_case_id}` anchors and requires each one to
+have an integration or e2e test that references it. Anchors must
+be unique across the whole doc set — the script errors on
+collision. Conventional prefixes keep namespaces clean: SPEC uses
+bare slugs (`entries`, `primitives`), PLUGINS uses `plugins_*`,
+plugin READMEs use `<plugin>_plugin`.
+**Untestable plugin docs (LLM providers, quickstart tutorials,
+loader-level behavior verified only in `test/live/`)** stay as
+plain headings without anchors. Anchors are a commitment to
+verification; skipping the anchor is the honest declaration that
+no integration test exists or is feasible.
 ---
-## 11. SQL Functions
+## SQL Functions {#sql_functions}
 | Function | Purpose |
 |----------|---------|
@@ -805,7 +1306,7 @@ See [PLUGINS.md](PLUGINS.md) for the hedberg pattern type reference.
 ---
-## 13. Debugging: E2E and Benchmark Results
+## Debugging: E2E and Benchmark Results
 ### E2E test failures
@@ -826,10 +1327,11 @@ The dump format is: `scheme:state path {attributes}\n  body (120 chars)` grouped
 Key things to look for in a dump:
 - **202**: unresolved proposals — model issued `<sh>`, `<rm>`, or `<mv>` that needs approval
-- **413**: budget overflow — assembled context exceeded ceiling before LLM call
-- **BudgetGuard errors**: per-tool rejections mid-turn (`Budget exceeded: N tokens requested`)
-- **`<sh>` in act/panic mode**: model fell back to shell when blocked (doc/prompt gap)
-- Loop sequence: look for `mode` in `instructions://system` attrs to see which loop type ran
+- **413**: budget overflow — assembled context exceeded ceiling (see [budget_enforcement](#budget_enforcement))
+- **403**: policy rejection (ask-mode file writes) or permission denial (writer ∉ `writable_by`)
+- **`error://` entries at status 413**: Turn Demotion fired — model received a directive to demote promotions next turn
+- **`error://` entries at other statuses**: runtime errors (422 parser warnings, 429 cycle detection, 403 policy rejections, 500 dispatch crashes)
+- **`<sh>` in ask mode**: the policy plugin rejected it; check for the corresponding `error://` entry
 ### MAB benchmark
@@ -853,20 +1355,79 @@ Run with: `npm run test:lme`
 ---
-## 12. Configuration
-```env
-RUMMY_HOME=~/.rummy
-RUMMY_TOKEN_DIVISOR=2
-RUMMY_MAX_TURNS=99
-RUMMY_MAX_STALLS=3
-RUMMY_MIN_CYCLES=3
-RUMMY_MAX_CYCLE_PERIOD=4
-RUMMY_MAX_UPDATE_REPEATS=3
-RUMMY_RETENTION_DAYS=31
-RUMMY_TEMPERATURE=0.5
-RUMMY_DEBUG=false
-```
-Model aliases: `RUMMY_MODEL_{alias}={provider/model}`. Seeded into
-`models` table at startup.
+## Configuration
+Full reference is `.env.example` — these are the load-bearing vars.
+**Runtime:**
+| Var | Default | Purpose |
+|-----|---------|---------|
+| `PORT` | 3044 | WebSocket port |
+| `RUMMY_HOME` | `~/.rummy` | Skills, personas, local config |
+| `RUMMY_DB_PATH` | `rummy.db` | SQLite path |
+| `RUMMY_MMAP_MB` | 0 | SQLite mmap hint (MB; 0 disables) |
+| `RUMMY_DEBUG` | false | Verbose logging |
+**Budget & token math:**
+| Var | Default | Purpose |
+|-----|---------|---------|
+| `RUMMY_BUDGET_CEILING` | 0.9 | Fraction of `contextSize` used as ceiling |
+| `RUMMY_MAX_ENTRY_TOKENS` | 512 | `known://` write rejection threshold |
+| `RUMMY_TOKEN_DIVISOR` | 2 | `ceil(chars/N)` token estimate divisor |
+**Loop controls:**
+| Var | Default | Purpose |
+|-----|---------|---------|
+| `RUMMY_MAX_TURNS` | 15 | Hard loop iteration cap |
+| `RUMMY_MAX_COMMANDS` | 99 | Max parsed tool calls per turn |
+| `RUMMY_MAX_STALLS` | 3 | Turns without `<update>` before force-complete |
+| `RUMMY_MAX_UPDATE_REPEATS` | 3 | Same-text repeat threshold without progress |
+| `RUMMY_MIN_CYCLES` | 3 | Consecutive repetitions to trigger cycle detection |
+| `RUMMY_MAX_CYCLE_PERIOD` | 4 | Max cycle period checked by healer |
+| `RUMMY_RETENTION_DAYS` | 31 | Days of completed/aborted runs kept |
+| `RUMMY_THINK` | 1 | Enable `<think>` tag reasoning |
+| `RUMMY_TEMPERATURE` | 0.5 | Default LLM temperature |
+| `RUMMY_RPC_TIMEOUT` | 30000 | RPC timeout (ms) |
+| `RUMMY_FETCH_TIMEOUT` | 300000 | LLM HTTP timeout (ms) |
+**LLM providers** (plugin-scoped; a provider with no config is inert):
+| Var | Purpose |
+|-----|---------|
+| `OPENROUTER_BASE_URL` / `OPENROUTER_API_KEY` | OpenRouter |
+| `OPENAI_BASE_URL` / `OPENAI_API_KEY` | OpenAI-compatible (llama.cpp, OpenAI, etc.) |
+| `OLLAMA_BASE_URL` | Ollama |
+| `XAI_BASE_URL` / `XAI_API_KEY` | xAI |
+| `RUMMY_HTTP_REFERER` / `RUMMY_X_TITLE` | OpenRouter attribution headers |
+**Model aliases:**
+`RUMMY_MODEL_{alias}={provider/model}` or `{provider/publisher/model}` —
+seeded into `models` table at startup. First path segment picks the
+provider plugin; the rest is the provider's own model identifier. E.g.
+`RUMMY_MODEL_gpt4=openai/gpt-4`, `RUMMY_MODEL_claude=openrouter/anthropic/claude-3-opus`.
+Optional companion: `RUMMY_CONTEXT_{alias}={tokens}` overrides the
+auto-discovered context length.
+**External plugins:**
+`RUMMY_PLUGIN_{name}={path or npm package}` loads an external plugin
+at startup. Absolute path or published package name (resolved via
+local `node_modules` then global).
+**Search:**
+| Var | Purpose |
+|-----|---------|
+| `RUMMY_SEARCH` | `brave` \| `searxng` |
+| `BRAVE_API_KEY` | Brave Search API key |
+| `RUMMY_SEARXNG_URL` | SearXNG instance URL |
+**Testing:**
+| Var | Purpose |
+|-----|---------|
+| `RUMMY_TEST_MODEL` | Model alias used by test/live/e2e runners |