npm - @possumtech/rummy - Versions diffs - 2.1.0 → 2.2.1 - Mend

@possumtech/rummy 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.env.example +40 -15
package/.xai.key +1 -0
package/PLUGINS.md +169 -53
package/README.md +38 -32
package/SPEC.md +366 -179
package/bin/digest.js +1097 -0
package/biome/no-fallbacks.grit +2 -2
package/gemini.key +1 -0
package/lang/en.json +10 -1
package/migrations/001_initial_schema.sql +9 -2
package/package.json +19 -8
package/service.js +1 -0
package/src/agent/AgentLoop.js +76 -26
package/src/agent/ContextAssembler.js +2 -0
package/src/agent/Entries.js +238 -60
package/src/agent/ProjectAgent.js +44 -0
package/src/agent/TurnExecutor.js +99 -30
package/src/agent/XmlParser.js +206 -111
package/src/agent/errors.js +35 -0
package/src/agent/known_queries.sql +1 -1
package/src/agent/known_store.sql +3 -42
package/src/agent/materializeContext.js +30 -1
package/src/agent/runs.sql +8 -18
package/src/agent/tokens.js +0 -1
package/src/agent/turns.sql +1 -0
package/src/hooks/Hooks.js +26 -0
package/src/hooks/RummyContext.js +12 -1
package/src/lib/hedberg/README.md +60 -0
package/src/lib/hedberg/hedberg.js +60 -0
package/src/lib/hedberg/marker.js +158 -0
package/src/{plugins → lib}/hedberg/matcher.js +1 -2
package/src/llm/LlmProvider.js +41 -3
package/src/llm/openaiStream.js +17 -0
package/src/plugins/ask_user/ask_user.js +12 -2
package/src/plugins/ask_user/ask_userDoc.md +1 -5
package/src/plugins/budget/README.md +29 -24
package/src/plugins/budget/budget.js +166 -110
package/src/plugins/cli/README.md +3 -4
package/src/plugins/cli/cli.js +31 -5
package/src/plugins/cloudflare/cloudflare.js +136 -0
package/src/plugins/cp/cp.js +41 -4
package/src/plugins/cp/cpDoc.md +5 -6
package/src/plugins/engine/engine.sql +1 -1
package/src/plugins/env/README.md +5 -4
package/src/plugins/env/env.js +7 -4
package/src/plugins/env/envDoc.md +7 -8
package/src/plugins/error/error.js +56 -15
package/src/plugins/file/README.md +12 -3
package/src/plugins/file/file.js +2 -2
package/src/plugins/get/get.js +59 -36
package/src/plugins/get/getDoc.md +10 -34
package/src/plugins/google/google.js +115 -0
package/src/plugins/hedberg/hedberg.js +13 -56
package/src/plugins/helpers.js +66 -12
package/src/plugins/index.js +1 -2
package/src/plugins/instructions/README.md +44 -47
package/src/plugins/instructions/instructions-system.md +44 -0
package/src/plugins/instructions/instructions-user.md +53 -0
package/src/plugins/instructions/instructions.js +58 -189
package/src/plugins/known/README.md +6 -7
package/src/plugins/known/known.js +24 -30
package/src/plugins/log/log.js +41 -32
package/src/plugins/mv/mv.js +40 -1
package/src/plugins/mv/mvDoc.md +1 -8
package/src/plugins/ollama/ollama.js +4 -3
package/src/plugins/openai/openai.js +4 -3
package/src/plugins/openrouter/openrouter.js +14 -4
package/src/plugins/persona/README.md +11 -13
package/src/plugins/persona/default.md +29 -0
package/src/plugins/persona/persona.js +10 -66
package/src/plugins/policy/policy.js +23 -22
package/src/plugins/prompt/README.md +37 -27
package/src/plugins/prompt/prompt.js +13 -19
package/src/plugins/rm/rm.js +18 -0
package/src/plugins/rm/rmDoc.md +5 -6
package/src/plugins/rpc/rpc.js +3 -3
package/src/plugins/set/set.js +205 -323
package/src/plugins/set/setDoc.md +47 -17
package/src/plugins/sh/README.md +6 -5
package/src/plugins/sh/sh.js +8 -5
package/src/plugins/sh/shDoc.md +7 -8
package/src/plugins/skill/README.md +37 -14
package/src/plugins/skill/skill.js +200 -101
package/src/plugins/skill/skillDoc.js +3 -0
package/src/plugins/skill/skillDoc.md +9 -0
package/src/plugins/stream/README.md +7 -6
package/src/plugins/stream/finalize.js +100 -0
package/src/plugins/stream/stream.js +13 -45
package/src/plugins/telemetry/telemetry.js +27 -4
package/src/plugins/think/think.js +2 -3
package/src/plugins/think/thinkDoc.md +2 -4
package/src/plugins/unknown/README.md +1 -1
package/src/plugins/unknown/unknown.js +17 -19
package/src/plugins/update/update.js +4 -51
package/src/plugins/update/updateDoc.md +21 -6
package/src/plugins/xai/xai.js +68 -102
package/src/plugins/yolo/yolo.js +102 -75
package/src/sql/functions/hedmatch.js +1 -1
package/src/sql/functions/hedreplace.js +1 -1
package/src/sql/functions/hedsearch.js +1 -1
package/src/sql/functions/slugify.js +16 -2
package/BENCH_ENVIRONMENT.md +0 -230
package/CLIENT_INTERFACE.md +0 -396
package/last_run.txt +0 -5617
package/scriptify/ask_run.js +0 -77
package/scriptify/cache_probe.js +0 -66
package/scriptify/cache_probe_grok.js +0 -74
package/src/agent/budget.js +0 -33
package/src/agent/config.js +0 -38
package/src/plugins/hedberg/README.md +0 -71
package/src/plugins/hedberg/docs.md +0 -0
package/src/plugins/hedberg/edits.js +0 -55
package/src/plugins/hedberg/normalize.js +0 -17
package/src/plugins/hedberg/sed.js +0 -49
package/src/plugins/instructions/instructions.md +0 -34
package/src/plugins/instructions/instructions_104.md +0 -8
package/src/plugins/instructions/instructions_105.md +0 -39
package/src/plugins/instructions/instructions_106.md +0 -22
package/src/plugins/instructions/instructions_107.md +0 -17
package/src/plugins/instructions/instructions_108.md +0 -0
package/src/plugins/known/knownDoc.js +0 -3
package/src/plugins/known/knownDoc.md +0 -8
package/src/plugins/unknown/unknownDoc.js +0 -3
package/src/plugins/unknown/unknownDoc.md +0 -11
package/turns/cli_1777462658211/turn_001.txt +0 -772
package/turns/cli_1777462658211/turn_002.txt +0 -606
package/turns/cli_1777462658211/turn_003.txt +0 -667
package/turns/cli_1777462658211/turn_004.txt +0 -297
package/turns/cli_1777462658211/turn_005.txt +0 -301
package/turns/cli_1777462658211/turn_006.txt +0 -262
package/turns/cli_1777465095132/turn_001.txt +0 -715
package/turns/cli_1777465095132/turn_002.txt +0 -236
package/turns/cli_1777465095132/turn_003.txt +0 -287
package/turns/cli_1777465095132/turn_004.txt +0 -694
package/turns/cli_1777465095132/turn_005.txt +0 -422
package/turns/cli_1777465095132/turn_006.txt +0 -365
package/turns/cli_1777465095132/turn_007.txt +0 -885
package/turns/cli_1777465095132/turn_008.txt +0 -1277
package/turns/cli_1777465095132/turn_009.txt +0 -736
/package/src/{plugins → lib}/hedberg/patterns.js +0 -0

package/SPEC.md CHANGED Viewed

@@ -1,9 +1,9 @@
 # RUMMY: Architecture Specification
 The authoritative reference for Rummy's design. The instructions
-plugin (`instructions.md` + phase-specific `instructions_10N.md` +
-tool docs) defines model-facing behavior. This document defines
-everything else.
+plugin (`instructions-system.md` + `instructions-user.md` + tool
+docs) and the persona plugin (`persona/default.md`) define model-
+facing behavior. This document defines everything else.
 ---
@@ -18,10 +18,9 @@ uses one of these words, it should mean exactly what's written here.
 | **loop** | One `ask` or `act` invocation and all its continuation turns until terminal `<update>`, abandonment, or abort. A run can contain multiple loops if a fresh prompt arrives on an existing run. |
 | **turn** | One round-trip with the LLM: one assembled prompt sent, one response parsed. A loop is a sequence of turns. |
 | **mode** | `ask` (read-only — no proposals, no `<sh>`, no edits) or `act` (full tool surface). Per loop, set at the entry point. |
-| **phase** | (Primary, FCRM sense.) One of five FCRM states selected by `<update status="1XY">`: 104=Definition, 105=Discovery, 106=Demotion, 107=Deployment, 108=Verification. Maps to `instructions_10N.md` rendered in `<instructions>`. **The model-facing instructions call these "stages"** — same concept, dual vocabulary kept for the model's surface stability. Two non-FCRM uses of "phase" coexist in the codebase and AGENTS.md: (1) "two-phase turn execution" refers to RECORD→DISPATCH within a single turn; (2) AGENTS.md "Phase 1 / Phase 2 / ..." entries refer to project-development milestones (Schema, Primitives, etc.) — neither is the FCRM phase. Context disambiguates; if it doesn't, it's a doc bug. |
-| **stage** | Model-facing synonym for **phase**. Lives in `instructions_*.md` and tooldocs. |
+| **phase** | The RECORD→DISPATCH split within a single turn (see [dispatch_path](#dispatch_path)). AGENTS.md "Phase 1 / Phase 2 / ..." entries refer to project-development milestones; that's a separate use of the word. The model-facing workflow lives in `persona/default.md` as the 7D ladder — Draft → Decompose → Discover → Distill → Define → Determine → Deliver — a persona convention, not a status-keyed engine state. |
 | **proposal** | A tool-call entry at status 202 awaiting client resolution (accept/reject). Side-effecting actions (`<sh>`, `<env>`, file `<set>`, file `<rm>`/`<mv>`/`<cp>`, `<ask_user>`) emit proposals. YOLO mode auto-accepts. |
-| **verdict** | The end-of-turn ruling from `hooks.error.verdict` (owned by the error plugin). Returns `{continue, status, reason}`. Decides whether the loop continues to another turn or terminates. |
+| **verdict** | The end-of-turn ruling from `hooks.turn.verdict.filter` — a generic filter chain. Returns `{continue, status, reason}`. The error plugin is the canonical subscriber today; future plugins (cycle-detection, budget-overflow termination) can join the chain to vote without touching error.js or AgentLoop. Decides whether the loop continues to another turn or terminates. |
 | **strike** | A turn whose verdict counts toward `MAX_STRIKES`. A strike fires when `turnErrors > 0` (any `error.log` entry that turn) or when cycle detection trips silently. The streak counter resets on a clean turn (no errors, no cycle); reaches `MAX_STRIKES` → loop abandons at 499. |
 | **resolution** | Client's accept/reject of a proposal via `run/resolve` RPC. |
 | **dispatch** | The DISPATCH phase of a turn — actually executing recorded action entries. |
@@ -124,11 +123,12 @@ hooks-and-filters system. Plugins subscribe to events (fire-and-forget
 side effects) and filters (transformation chains that thread a value
 through subscribers in priority order).
-**Every `<tag>` the model sees is a plugin.** `<knowns>` → known
-plugin. `<unknowns>` → unknown plugin. `<performed>` → performed
-plugin. `<previous>` → previous plugin. `<prompt>` → prompt plugin.
-No monolithic assembler decides what goes where. Each plugin filters
-for its own data from the shared row set, renders its section, returns.
+**Every `<tag>` the model sees is a plugin.** `<summary>` /
+`<visible>` → known plugin. `<unknowns>` → unknown plugin. `<log>`
+→ log plugin. `<instructions>` → instructions plugin. `<prompt>` →
+prompt plugin. `<budget>` → budget plugin. No monolithic assembler
+decides what goes where. Each plugin filters for its own data from
+the shared row set, renders its section, returns.
 **Plugins compose, they don't coordinate.** A plugin subscribes to a
 filter at a priority, receives the accumulator value, appends its
@@ -252,7 +252,7 @@ Every entry plays one of four roles:
 | Role | Category | Section | Description |
 |------|----------|---------|-------------|
-| **Data** | `data` | `<summarized>` + `<visible>` | Entries the model works with — persistent state and captured payload. Summary line in `<summarized>` for visible+summarized tiers; full body in `<visible>` only when promoted. |
+| **Data** | `data` | `<summary>` + `<visible>` | Entries the model works with — persistent state and captured payload. Summary line in `<summary>` for visible+summarized tiers; full body in `<visible>` only when promoted. |
 | **Logging** | `logging` | `<log>` | Records of what happened — tool results, lifecycle signals |
 | **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
 | **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
@@ -287,7 +287,7 @@ across two namespaces as a direct consequence:
   scheme=`log`, category=`logging`. Renders in `<log>`.
 - **Payload channels** live in `{action}://turn_N/{slug}_N` —
   scheme=`{action}` (registered as `category: "data"`). Render in
-  `<summarized>` (always, while tracked) and `<visible>` (when
+  `<summary>` (always, while tracked) and `<visible>` (when
   promoted).
 This keeps `<log>` a terse audit trail (what happened, exit code,
@@ -390,11 +390,18 @@ the current loop; pending loops survive. Projects > runs > loops > turns.
 The `file_constraints` table is project-level configuration — it
 defines which files a project cares about. This is backbone, not tool
-dispatch. Constraints have three visibilities:
-- `active` — matching files are promoted into the run's context
-- `readonly` — promoted but not editable by the model
-- `ignore` — demoted (excluded from context)
+dispatch. Constraint type governs **membership** and **write
+permission**, not in-context visibility. In-context visibility
+(`visible` / `summarized` / `archived`) is per-entry and model-
+controlled — files default to `archived` on ingestion; the model
+promotes via `<get>` / `<set visibility=…>`.
+- `add` — file is part of the project; ingested as an entry; model
+  may write. Default for `setConstraint`.
+- `readonly` — same ingestion; `<set>` is vetoed at the proposal-
+  accept gate.
+- `ignore` — excluded from scans entirely. The file remains on disk
+  for `<sh>` / `<env>` invocation but is not present as an entry.
 **Boundary:** Setting a constraint (`File.setConstraint`) is a
 project-config write. Promoting/demoting the matching entries is tool
@@ -503,9 +510,9 @@ to a continuation (the model's claim of doneness is false); the update
 plugin resolves the update entry to 409 and surfaces it to the next
 turn as a continuation. Multiple `<update>` tags → last signal wins.
-**Post-dispatch budget check:** After all tools dispatch, the budget
-plugin re-materializes context and checks the ceiling
-(`hooks.budget.postDispatch`). If context exceeds the ceiling, Turn
+**Post-dispatch budget check:** After all tools dispatch, TurnExecutor
+emits `turn.dispatched`; the budget plugin subscribes, re-materializes
+context, and checks the ceiling. If context exceeds the ceiling, Turn
 Demotion fires — all `visible` `run_views` rows for the current turn
 have their `visibility` flipped to `summarized`, and an `error://` entry at status 413 is
 written. Status is NOT touched (see [schemes_status_visibility](#schemes_status_visibility)). The tools already ran;
@@ -685,44 +692,43 @@ plumbing for the attribute and the rummy-context payload enrichment
 on `proposal.pending`. Feature logic stays in
 `src/plugins/yolo/yolo.js`.
-### Repo Overview {#repo_overview}
+### Project Manifest {#project_manifest}
-The `rummy.repo` plugin maintains a single `repo://overview` entry per
-run, regenerated on every scan, that gives the model a navigable map
-of the project. It is the entry-point for code-aware runs — files
-themselves default to `archived` so a 5000-file repo doesn't dump
-hundreds of thousands of tokens into context before any work happens.
+The `rummy.repo` plugin writes a single `log://turn_0/repo/manifest` entry
+once per run — a flat snapshot of every project file with its token
+cost. It gives the model orientation at run start without burning
+prefix-cache on a turn-keyed regeneration. Files themselves default
+to `archived` so a 5000-file repo doesn't dump hundreds of thousands
+of tokens into context before any work happens.
 **Entry contract.**
-- Path: `repo://overview` (scheme `repo`, category `data`,
-  `model_visible: 1`)
-- Visibility: `visible` (the navigation map is always in context)
-- Body: a markdown structure containing the project root, file count,
-  root-level files, top-level directories with file counts,
-  active/readonly constraints, and a navigation legend showing the
-  promote/demote idioms.
-- Visible projection: full body.
-- Summarized projection: first ~12 lines + a truncation marker, so a
-  model can demote it once it has the layout memorized.
+- Path: `log://turn_0/repo/manifest` (log scheme; turn-0 marks "before
+  any model turn"). One entry per run, written once.
+- Visibility: `visible` at write; demotable like any log entry.
+- Body: a flat list of `* <relative-path> - <N> tokens` lines, one
+  per file, sorted by path. No headers, no directory aggregation, no
+  constraints, no navigation legend — those are the model's business
+  to derive from the list itself or from tooldocs.
+**Stale by design.** The manifest is a turn-0 snapshot; it does not
+update mid-run. Authoritative current state lives in the per-file
+entries (mtime/hash-driven, change-only writes). The model can
+`<get path="**" preview/>` for a fresh listing if it suspects
+staleness.
 **File default visibility flip.**
 `FileScanner` registers each tracked file at `archived` by default
 (was `summarized`). Files with `constraint=active` still register at
-`visible`. The model uses `repo://overview` to discover paths, then
+`visible`. The model uses the manifest to discover paths, then
 promotes individual files via `<get path=...>` (visible, full body)
 or whole subtrees via `<set path=".../**" visibility="summarized"/>`
 (skim mode, symbols only).
-**Bounded cost.** The overview body is constant-ish in size regardless
-of repo size: root files capped, directory counts aggregated, no per-
-file symbol enumeration. The token cost in context stays roughly
-flat from a 30-file project to a 50,000-file monorepo.
 **Disabled when noRepo.** Setting `noRepo: true` on a run skips the
-scan entirely; no `repo://overview` is created and no file entries
-are registered. Behaviour identical to pre-plugin runs.
+scan entirely; no manifest is created and no file entries are
+registered. Behaviour identical to pre-plugin runs.
 ### Streaming Entries {#streaming_entries}
@@ -749,13 +755,13 @@ log://turn_N/{action}/{slug}    scheme=log       category=logging   status=202
 {action}://turn_N/{slug}_1      scheme={action}  category=data      status=102 → 200/500
                                 body: primary stream (stdout for shell)
-                                summary="{command}" visibility=summarized
-                                (line in <summarized>; full body in
+                                tags="{command}" visibility=summarized
+                                (line in <summary>; full body in
                                  <visible> when promoted)
 {action}://turn_N/{slug}_2      scheme={action}  category=data      status=102 → 200/500
                                 body: alt stream (stderr for shell)
-                                (line in <summarized>; full body in
+                                (line in <summary>; full body in
                                  <visible> when promoted, often empty)
 ```
@@ -809,21 +815,32 @@ Two messages per turn. System = stable truth. User = active task.
 ```
 [system message]
-    instructions text
-        (instructions.md base template + tool docs injected via
-         instructions.toolDocs filter; optional persona appended)
-[user message]
-    <summarized>
+    instructions-system.md text (with [%TOOLS%] / [%TOOLDOCS%]
+    expansions) + persona body. Resolved by the instructions
+    plugin's hooks.instructions.resolveSystemPrompt — single-owner,
+    cache-stable across all turns within a run. The assembly.system
+    filter chain exists but currently has no subscribers; the
+    system message is the resolved system prompt verbatim.
+[user message]                     (sandwich ordering — see below)
+    <prompt tokenUsage="N" tokensFree="M">user prompt</prompt>
+        (prompt.js, assembly.user priority 30 — front, cacheable
+         across the run within a loop)
+    <summary>
         one entry per category=data entry whose visibility is visible
-        or summarized; plus the named carve-out (archived prompts pass
-        through with visibility="archived" so the model can <get> the
-        active prompt back). Each entry renders under its scheme tag
-        with its summarized projection as the tag body — this is the
-        compact-but-informative view produced by the plugin's summary()
-        hook (e.g. truncated knowns, code symbols for files, page
-        abstracts for URLs). Identity-keyed, slow-mutating: only grows
-        when a new entry lands. (known.js, assembly.user priority 50)
-    </summarized>
+        or summarized. Each entry renders under its scheme tag with
+        its summarized projection as the tag body — the compact-but-
+        informative view produced by the plugin's summarized() hook
+        (truncated knowns, code symbols for files, page abstracts
+        for URLs). Identity-keyed, slow-mutating: only grows when a
+        new entry lands. Archived entries — including prompts —
+        are filtered out uniformly. There is no instruction-side
+        guard against archiving the active prompt — if the model
+        archives it, the next turn renders without a <prompt> tag
+        and visibly fails (paradigm purity over silent rescue;
+        action-gate is the principled future fix per
+        src/plugins/prompt/README.md).
+        (known.js, assembly.user priority 50)
+    </summary>
     <visible>
         each category=data entry whose visibility is visible, rendered
         under its scheme tag with its visible projection as the tag
@@ -833,63 +850,86 @@ Two messages per turn. System = stable truth. User = active task.
         (known.js, assembly.user priority 75)
     </visible>
     <log>
-        action history — log:// entries + pre-latest prompts
+        action history — all logging-category entries (log:// audit
+        records, error://, update://) plus pre-latest prompt://
+        entries (the active prompt is extracted to <prompt>).
         (log.js, assembly.user priority 100)
     </log>
     <unknowns>
-        (open questions at category=unknown, unknown.js priority 200)
+        open questions at category=unknown, rendered under <unknown>
+        children with their bodies as questions. (unknown.js,
+        assembly.user priority 150)
     </unknowns>
     <instructions>
-        current phase directive — one of instructions_104.md …
-        instructions_108.md, selected by the latest <update status="1XY">
-        emission (instructions.js, assembly.user priority 250)
+        instructions-user.md text. Per-turn imperative reminders.
+        Same bytes every turn — no phase keying, no status-driven
+        selection. (instructions.js, assembly.user priority 165)
     </instructions>
-    <prompt mode="ask|act" tokenUsage="N" tokensFree="M">user prompt</prompt>
+    <budget tokenUsage="N" tokensFree="M">…breakdown table…</budget>
+        (budget.js, assembly.user priority 175 — last, recency for
+         the live accounting at the action site)
 ```
 **System** = stable world state the model operates within (identity,
-tools, tool docs). Stable across turns within a run, which keeps
-prompt caching intact. **User** = active work (what the model is
-doing right now): the project's data surface, history, open questions,
-current phase, and current prompt. Both phase-specific
-`<instructions>` and the codebase blocks (`<summarized>` / `<visible>`)
-live in the user message because they change turn-to-turn — putting
-mutable state in system would invalidate the cache on every promote
-or phase transition.
+tools, tool docs, persona). Stable across turns within a run, which
+keeps prompt caching intact. **User** = active work: the project's
+data surface, history, open questions, current task, and live
+accounting. The user message changes turn-to-turn so it sits outside
+the prefix-cacheable region; both `<instructions>` and the codebase
+blocks (`<summary>` / `<visible>`) live here because they mutate at
+turn cadence — putting mutable state in system would invalidate the
+cache on every promote.
+**Sandwich ordering.** User-message blocks are arranged
+`<prompt>` (30, front) → `<summary>` (50) → `<visible>` (75) →
+`<log>` (100) → `<unknowns>` (150) → `<instructions>` (165) →
+`<budget>` (175, last). The prompt sits at the front (cacheable
+across turns of a loop, since it doesn't change within a loop); the
+instructions and budget sit at the tail so the rules and live
+accounting have recency at the action site. An earlier front-loaded
+ordering (instructions first for max cache) regressed terminal-
+`<update>` discipline in e2e — the model lost the rule when it sat
+3K tokens upstream of the action. Recency at the action site beats
+cache savings when the action depends on remembering a rule.
 **Why two blocks instead of one `<context>`.** Promote/demote is the
-dominant intra-phase operation. Today's single-block render
-invalidates the entire data surface every time. With the split,
-`<summarized>` mutates only when a new entry lands (slow); `<visible>`
+dominant intra-loop operation. A single-block render would
+invalidate the entire data surface on every promote. With the split,
+`<summary>` mutates only when a new entry lands (slow); `<visible>`
 mutates on every promote/demote (fast). Ordering slow-above-fast
-preserves the prefix cache for `<summarized>` across the common case.
-Cognitively: `<summarized>` is "what I know exists" (identity);
+preserves the prefix cache for `<summary>` across the common case.
+Cognitively: `<summary>` is "what I know exists" (identity);
 `<visible>` is "what I'm reading right now" (working memory).
 The `<prompt>` tag is present on every turn — first turn and
-continuations alike. The model always sees its task. The active prompt
-is extracted from its chronological position and placed last for maximum
-recency. The `<prompt>` element carries `tokenUsage` / `tokensFree`
-attributes so the model can do budget arithmetic in-line with the cause.
+continuations alike. The model always sees its task. The
+`tokenUsage` / `tokensFree` attributes also appear on `<budget>` so
+the model can do budget arithmetic at both ends of the user message.
-### Loops, Previous, and Performed {#loops_previous_performed}
+### Loops and Cross-Loop Continuity {#loops_previous_performed}
 A **loop** is one `ask` or `act` invocation and all its continuation
-turns until `<update status="200">`, fail, or abort.
-**Previous** = all completed loops on this run. The user prompt, model
-responses, tool results, agent warnings — the full chronicle in order.
-Lives in the system message as established history. Omitted on the
-first turn of the first loop.
-**Performed** = the active loop's work so far. Model responses, tool
-results, agent warnings — in order. Does NOT include the user prompt
-(one per loop, extracted to `<prompt>`). Lives in the user
-message as immediate context. Empty on the first turn of a loop.
+turns until `<update status="200">`, fail, or abort. A run may
+contain many loops; pending loops queue FIFO via the loops table.
+Cross-loop continuity is carried by the entry store itself:
+- **Knowns, files, unknowns** persist across loop boundaries with
+  whatever visibility the model left them at. They render in
+  `<summary>` / `<visible>` per visibility, regardless of which
+  loop wrote them.
+- **Log entries** (action audit, errors, updates) accumulate at
+  `log://turn_N/...` for every turn of every loop; `log.js`
+  renders all logging-category entries plus pre-latest prompts in
+  `<log>` in chronological order.
+- **The active prompt** is extracted from its chronological
+  position and rendered as `<prompt>` at priority 30 (front);
+  prior prompts render in `<log>` like any other logging entry.
 When a new prompt arrives on an existing run, the prior loop's
-`<performed>` content plus its prompt move to `<previous>`. When a loop
-continues (next turn), new results append to `<performed>`.
+`prompt://N` entry stays in the store; on the next assembly it
+falls out of `<prompt>` (replaced by the new prompt) and into
+`<log>` — visibility-driven re-rendering of the same entry rows.
 ### Key Entries {#key_entries}
@@ -911,23 +951,33 @@ Each turn:
 1. Write `instructions://system` (empty body, attributes = { persona, toolSet })
 2. Emit `turn.started` — plugins write prompt/instructions entries
-3. Resolve the instructions system prompt (`hooks.instructions.resolveSystemPrompt`)
+3. Resolve the instructions system prompt
+   (`hooks.instructions.resolveSystemPrompt` — single-owner; see
+   AGENTS.md "Architectural exceptions"). Returns
+   `instructions-system.md` with `[%TOOLS%]` / `[%TOOLDOCS%]`
+   expanded, persona body appended.
 4. Query `v_model_context` VIEW → visible entries (joined from
    `run_views` + `entries` + `schemes`)
 5. Project each entry through its scheme's `visible`/`summarized` projection
 6. Insert projected rows into `turn_context`
-7. Invoke `assembly.system` filter chain (instructions text as base):
-   - Known plugin (priority 100) → `<knowns>` section
-   - Previous plugin (priority 200) → `<previous>` section
+7. Invoke `assembly.system` filter chain — currently no
+   subscribers, so the system message is the resolved system
+   prompt verbatim.
 8. Invoke `assembly.user` filter chain (empty string as base):
-   - Performed plugin (priority 100) → `<performed>` section
-   - Unknown plugin (priority 200) → `<unknowns>` section
-   - Prompt plugin (priority 300) → `<prompt>` element (carries
-     `tokenUsage` / `tokensFree` attrs when `contextSize` is set)
+   - Prompt plugin (priority 30) → `<prompt>` element (carries
+     `tokenUsage` / `tokensFree` attrs)
+   - Known plugin (priority 50) → `<summary>` section
+   - Known plugin (priority 75) → `<visible>` section
+   - Log plugin (priority 100) → `<log>` section
+   - Unknown plugin (priority 150) → `<unknowns>` section
+   - Instructions plugin (priority 165) → `<instructions>` section
+     (renders `instructions-user.md`)
+   - Budget plugin (priority 175) → `<budget>` element (carries
+     `tokenUsage` / `tokensFree` and per-scheme breakdown)
 9. Store as `system://N` and `user://N` audit entries (telemetry plugin)
 The VIEW determines visibility from `visibility` and `status`:
-- `visibility = 'visible'` → full body visible in `<knowns>` / `<performed>`.
+- `visibility = 'visible'` → full body visible in `<visible>` (data) or `<log>` (logging).
 - `visibility = 'summarized'` → summarized projection visible (typically path +
   summary attr). Promote with `<get>` to expand.
 - `visibility = 'archived'` → invisible. Discoverable via pattern search
@@ -947,6 +997,29 @@ Model controls visibility via `<set>` attributes:
 attaches a description (≤ 80 chars) that persists across visibility
 changes.
+### Filesystem Freshness {#filesystem_freshness}
+After any mutation of a file or scheme entry, the next turn's
+assembled context reflects the post-mutation body AND visibility,
+without the model needing a fresh `<get>` to recover its own
+changes. The model's view of the entry store is always a faithful
+projection of current state — there is no read-after-write skew.
+The invariant has two parts:
+1. **Body freshness** — a write that changes the entry body shows
+   the new body on the next assembly's `<visible>` (when visible)
+   or under `<get>` (when summarized/archived).
+2. **Visibility freshness** — a write that explicitly sets
+   `visibility=...` honors the requested level on the next
+   assembly. Edit-path side effects (e.g., a SEARCH/REPLACE accept
+   silently downgrading visibility) violate the invariant; the
+   model would answer the next turn from memory of pre-edit state
+   while the new body sits invisible.
+Enforcement: `test/integration/file_freshness.test.js` exercises
+write-through for both file and scheme entries.
 ### Token Accounting {#token_accounting}
 Tokens are a property of the materialized packet, not of stored entries.
@@ -1021,49 +1094,55 @@ an entry is being written.
 ### Budget Enforcement {#budget_enforcement}
 The model owns its context. The system enforces a hard ceiling and
-surfaces the numbers — it does not automatically manage entries.
+surfaces the numbers. Auto-demotion is reserved for the 413 budget
+grinder, which only fires in response to actual overflow — never
+helpfully or speculatively.
 **Ceiling.** `ceiling = floor(contextSize × RUMMY_BUDGET_CEILING)`
 (default `RUMMY_BUDGET_CEILING = 0.9`, i.e. 10% headroom). All budget
 decisions compare `assembledTokens` against `ceiling`, never against
 `contextSize` directly.
-**Pre-LLM enforce** (`hooks.budget.enforce`, in TurnExecutor before
-the LLM call). Measures the assembled messages (using
-`turns.context_tokens` from the prior turn when available,
-`countTokens(messages)` as a first-turn estimate).
-- `assembledTokens ≤ ceiling` → return 200, proceed to LLM.
-- `assembledTokens > ceiling` on the first turn of a loop → **Prompt
-  Demotion**: demote the incoming `prompt://N` entry to `visibility =
-  demoted`, re-materialize, re-check. If the retry fits, proceed.
-- `assembledTokens > ceiling` on a non-first turn, or still over after
-  Prompt Demotion → return 413. AgentLoop exits the loop with 413.
-**Post-dispatch Turn Demotion** (`hooks.budget.postDispatch`, after
-all tool dispatches complete). Re-materializes end-of-turn context
-and re-checks. If still over the ceiling, flips every `run_views` row
-for this turn from `visibility = visible` to `visibility = summarized`
-(status preserved — see [schemes_status_visibility](#schemes_status_visibility))
-and emits a 413 error via `hooks.error.log.emit` with the descriptive
-body (what was demoted, the 50% rule for the next turn). The model
-sees the `error://` entry next turn and adjusts.
-**Delta-from-actual prediction.** Post-dispatch uses
-`predictNextPacket = lastContextTokens + Σ countTokens(body) for rows added this turn`,
-not the conservative measureMessages estimator. Reason: a 60%+
-divergence between the pre-call `<prompt tokenUsage>` (real API
-prompt_tokens) and the post-check estimator made the model dismiss
-the budget as janky and stop following demote rules. The two numbers
-must live on the same scale.
-**Prior-turn-pressure fallback.** If post-dispatch finds nothing to
-demote in the current turn but the packet still overflows, the
-pressure is coming from prior-turn promotions the model never demoted
-itself. Demotion widens to all currently-visible entries in the run
-and the prompt is also demoted. Without this fallback, observed
-behavior was strikes accumulating on runs whose base context had
-drifted over ceiling through no fault of the current turn.
+**Pre-LLM grinder** (`hooks.turn.beforeDispatch.filter`, in
+TurnExecutor before the LLM call; budget is the canonical
+subscriber). A four-step ladder. Each step demotes a strictly smaller
+scope and rechecks. The first step that fits the ceiling proceeds to
+the LLM; if step 4 fires, AgentLoop exits the loop with 413.
+1. **Check budget.** Measure `assembledTokens` (using
+   `turns.context_tokens` from the prior turn when available, the
+   materialized packet estimate as a first-turn fallback). If
+   `assembledTokens ≤ ceiling`, proceed to the LLM.
+2. **Soft 413 — previous-turn demotion.** Flip every `run_views`
+   row where `turn = current_turn - 1 AND visibility = visible` to
+   `summarized` (status preserved — see
+   [schemes_status_visibility](#schemes_status_visibility)). All
+   schemes participate; no exemption for knowns / unknowns /
+   files. Re-materialize, re-check.
+3. **Soft 413 — current-prompt demotion.** Flip the incoming
+   `prompt://N` entry to `summarized`. Re-materialize, re-check.
+   Step 3 exists because the prompt is stamped at `current_turn`,
+   not the previous turn — step 2's filter never sees it. Without
+   step 3, an oversized first-turn prompt has no path to fit.
+4. **Hard 413.** Emit a 413 `error://` entry via
+   `hooks.error.log.emit` with the descriptive body (what was
+   demoted across steps 2-3, the ceiling, the residual overflow).
+   AgentLoop exits the loop with 413.
+Steps 2 and 3 also emit 413 `error://` entries when they fire
+(distinct from step 4 in that the run keeps going). The model reads
+those next turn and learns what got auto-demoted. Status of the
+turn that proceeded after a soft 413 is unaffected.
+**Trunks and forks are treated identically.** A forked run inherits
+the parent's `run_views` rows verbatim — each entry keeps its
+original `turn`. There is no fork-event restamping. The grinder's
+`current_turn - 1` rule applies the same way in both cases. For
+the rule to point at meaningful inherited content on a fork's first
+dispatch, the child run inherits the parent's `next_turn` so turn
+numbering is absolute across the lineage; sibling forks share the
+same prior history at lower turn numbers and only diverge at
+fork-time.
 **LLM-reported context exceeded.** If the LLM rejects the request
 with a "context too long" error (detected via the regex in
@@ -1147,9 +1226,9 @@ on `get` also sets a project-level file constraint (operator privilege).
 | Method | Params |
 |--------|--------|
-| `startRun` | `{ model, temperature?, persona?, contextLimit? }` |
-| `ask` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, fork? }` |
-| `act` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, fork? }` |
+| `startRun` | `{ model, temperature?, persona?, contextLimit?, yolo? }` |
+| `ask` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, noProposals?, yolo?, fork? }` |
+| `act` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, noProposals?, yolo?, fork? }` |
 | `run/resolve` | `{ run, resolution: { path, action, output? } }` |
 | `run/abort` | `{ run }` |
 | `run/rename` | `{ run, name }` |
@@ -1161,6 +1240,10 @@ on `get` also sets a project-level file constraint (operator privilege).
 be added explicitly by the client).
 `noInteraction` removes `ask_user` from the tool list.
 `noWeb` removes `search` from the tool list.
+`noProposals` removes `ask_user` / `env` / `sh` from the tool list
+(no proposals at all).
+`yolo` opts the run into server-side proposal auto-accept and
+in-process sh/env execution — see [yolo_mode](#yolo_mode).
 #### Streaming (see [streaming_entries](#streaming_entries))
@@ -1190,17 +1273,20 @@ connected clients). `stream/cancel` also handles stale 102 cleanup.
 #### Skills & Personas
-| Method | Params |
-|--------|--------|
-| `skill/add` | `{ run, name }` |
-| `skill/remove` | `{ run, name }` |
-| `getSkills` | `{ run }` |
-| `listSkills` | — |
-| `persona/set` | `{ run, name?, text? }` |
-| `listPersonas` | — |
-Skills loaded from `RUMMY_HOME/skills/{name}.md`. Personas from
-`RUMMY_HOME/personas/{name}.md`.
+Both attach to a run via the entry grammar.
+- **Skills** — model emits `<skill path="[path-or-url]"/>`.
+  Handler walks local file/folder/`.zip` (via `yauzl-promise`) or
+  fetches a URL. Single `.md` registers as `skill://<name>`
+  (summarized); folder/zip registers root `index.md` summarized,
+  rest archived; `foo/index.md` collapses to `skill://<name>/foo`.
+  Re-emit overwrites. Authors link with absolute `skill://...` URIs.
+- **Personas** — `ask` / `act` / `startRun` accept `persona` as a
+  run attribute. The persona plugin renders the persona body inside
+  the system prompt (below tooldocs) on first turn; if no `persona`
+  is passed, `AgentLoop.ensureRun` defaults to
+  `src/plugins/persona/default.md`. 1:1 run:persona, immutable for
+  the run's lifetime.
 ### Notifications {#notifications}
@@ -1378,18 +1464,116 @@ are universal — not a feature of any single tool.
 ---
-## Hedberg Editing Syntax {#hedberg}
+## Edit Syntax
+The model expresses entry writes through `<set path="..."><body></set>`.
+The body shape determines the operation. All shaped operations use a
+bash-heredoc-flavored marker family.
+### Marker Grammar
+    <<IDENT
+    body content
+    IDENT
+Where `IDENT` matches `[A-Z][A-Za-z0-9_]*`. The leading keyword of
+`IDENT` selects the operation; any trailing alphanumeric suffix is
+opaque to operation routing and exists to disambiguate nested markers
+or avoid collisions when the body literally contains the bare keyword
+(same convention as bash heredoc `<<EOF1` vs `<<EOF`).
+The opener `<<IDENT` must be preceded by start-of-body, whitespace,
+or `>` (so `vec<<SEARCH` mid-token does not false-trigger). The
+closer is bare `IDENT` with whitespace boundaries on both sides.
+Newline-tolerant: the multi-line shape above and the single-line
+`<<IDENT body IDENT` form parse identically.
+### Distinct from Packet Rendering
+The engine renders entry bodies in context using a different marker
+shape: `<<:::path...:::path` (see `plugins/helpers.js`). Edit syntax
+is the bare `<<IDENT` form; packet rendering keeps the `:::` sentinel.
+The two grammars are visibly distinct so model emissions and engine
+renderings can never be confused. A `<set>` body echoing the packet
+shape is NOT treated as edit syntax — it falls through to plain-body
+REPLACE with the markers preserved as literal content.
+### Operations
+| IDENT prefix | Effect |
+|---|---|
+| `NEW` | Create the entry. Behaves identically to `REPLACE` on existing entries — named separately to align with model intent. |
+| `PREPEND` | Prepend body content to the existing entry. Creates the entry if it doesn't exist. |
+| `APPEND` | Append body content to the existing entry. Creates the entry if it doesn't exist. |
+| `REPLACE` | Replace the entire entry body with the marker content. Standalone (not preceded by `SEARCH`). |
+| `DELETE` | Remove a literal-matching region from the existing entry body. The marker content is the region to remove. |
+| `SEARCH` | Match a literal region in the existing entry body. Must be immediately followed by a `REPLACE` block; the pair is an in-place edit. |
+### SEARCH / REPLACE Pairs
+Surgical in-place edits. `SEARCH` must be immediately followed by
+`REPLACE` (no intervening operation):
+    <set path="src/main.go"><<SEARCH
+    old line
+    SEARCH
+    <<REPLACE
+    new line
+    REPLACE</set>
+Multiple pairs in one `<set>` body apply in order against the
+progressively-edited body.
+### Suffix for Body Collisions
+When the body content literally contains a marker keyword (`SEARCH`
+in prose, `<<` in code), the model appends a digit or alphanumeric
+suffix to the IDENT so the inner literal does not prematurely close
+the outer marker:
+    <set path="docs/grammar.md"><<DOC1
+    The opener is <<SEARCH and the closer is bare SEARCH alone on
+    a line. Use <<SEARCH1 ... SEARCH1 if your body contains literal
+    SEARCH or <<SEARCH tokens.
+    DOC1</set>
+### Errors
+| Condition | Outcome |
+|---|---|
+| `SEARCH` content not found in current body | conflict (soft) |
+| `DELETE` content not found in current body | conflict (soft) |
+| Lone `SEARCH` (no following `REPLACE`) | parse error |
+| Unclosed marker (opener with no matching `IDENT` closer) | parse error |
+| Non-keyword `IDENT` (e.g. `<<EOF`, `<<DOC`) | routes to REPLACE — inner content becomes the new body |
+| `<set>` body with no `<<IDENT` markers at all | full-body REPLACE (tolerated; not demonstrated to models) |
+### Pattern Matching
+The literal-match semantics used by `SEARCH` and `DELETE` are
+delegated to the Hedberg pattern library — see [hedberg](#hedberg).
+Matching is fuzzy on whitespace and indentation; an exact-byte match
+is not required.
+---
+## Hedberg Pattern Library {#hedberg}
+The pattern library exposed to every plugin through `core.hooks.hedberg`.
+Used internally by the Edit Syntax (above) for `SEARCH` / `DELETE`
+matching and by `<get>` / `<rm>` for path globs.
-The model picks its preferred edit format. The parser understands all of them:
+| Function | Purpose |
+|---|---|
+| `match(pattern, string)` | Full-string match — paths, equality. |
+| `search(pattern, string)` | Substring search — content filtering. |
+| `replace(text, search, replace, options)` | Patch application; fuzzy on whitespace and indentation. |
+| `generatePatch(path, oldBody, newBody)` | Unified-diff rendering for telemetry. |
-1. Git merge conflict: `<<<<<<< SEARCH ... ======= ... >>>>>>> REPLACE`
-2. Replace-only: `======= ... >>>>>>> REPLACE`
-3. Unified diff: `@@ -1,3 +1,3 @@` with `-`/`+` lines
-4. Sed syntax: `s/old/new/flags`
-5. Claude XML: `<old_text>old</old_text><new_text>new</new_text>`
-6. JSON body: `{"search": "old", "replace": "new"}` or `{search="old", replace="new"}`
-7. XML attributes: `<set search="old" replace="new"/>`
-8. Full replacement: anything else becomes the new content
+Pattern types: glob (picomatch-backed, with `**` cross-slash and
+`!()` negation), regex (`/pattern/flags`), xpath, jsonpath. Detection
+is by syntactic shape — see `src/lib/hedberg/patterns.js`.
 ---
@@ -1477,10 +1661,9 @@ htmlparser2 dropped. Close current, open new, emit recovery warning.
 attributes on the open tag *and* body text inside the tag. If the
 canonical attribute is missing, the body silently fills it. The
 shape per tool:
-- `set` — structured edit detection (merge-conflict markers, udiff,
-  Claude `<old_text>` XML, JSON `{search,replace}`, sed `s/.../.../`,
-  attribute-mode `search=`/`replace=`, body-as-search-when-`body=`
-  attr-set, plain write).
+- `set` — body parsed via `parseMarkerBody` (see "Edit Syntax"
+  above): `<<:::IDENT...:::IDENT` markers route to `operations`
+  list; bodies without markers are plain-body REPLACE.
 - `update` — body fills `body`, status defaults to 102 if absent.
 - `get` / `rm` — attr `path` or body fills target. Spread `a` so
   `line` / `limit` / `visibility` / future attrs reach the handler.
@@ -1642,7 +1825,7 @@ Full reference is `.env.example` — these are the load-bearing vars.
 | Var | Default | Purpose |
 |-----|---------|---------|
 | `PORT` | 3044 | WebSocket port |
-| `RUMMY_HOME` | `~/.rummy` | Skills, personas, local config |
+| `RUMMY_HOME` | `~/.rummy` | Local config root. Used by telemetry; available for future per-user state. |
 | `RUMMY_DB_PATH` | `rummy.db` | SQLite path |
 | `RUMMY_MMAP_MB` | 0 | SQLite mmap hint (MB; 0 disables) |
 | `RUMMY_DEBUG` | false | Verbose logging |
@@ -1665,10 +1848,12 @@ Full reference is `.env.example` — these are the load-bearing vars.
 | `RUMMY_MIN_CYCLES` | 3 | Consecutive repetitions to trigger cycle detection |
 | `RUMMY_MAX_CYCLE_PERIOD` | 4 | Max cycle period checked by healer |
 | `RUMMY_RETENTION_DAYS` | 31 | Days of completed/aborted runs kept |
-| `RUMMY_THINK` | 1 | Enable `<think>` tag reasoning |
-| `RUMMY_TEMPERATURE` | 0.5 | Default LLM temperature |
+| `RUMMY_THINK` | 0 | Reasoning request flag forwarded to LLM provider |
+| `RUMMY_TEMPERATURE` | 0.1 | Default LLM temperature |
 | `RUMMY_RPC_TIMEOUT` | 30000 | RPC timeout (ms) |
 | `RUMMY_FETCH_TIMEOUT` | 300000 | LLM HTTP timeout (ms) |
+| `RUMMY_LLM_DEADLINE` | 600000 | LLM transient-retry deadline (ms). Used as the budget for `warmup` and `rate_limit` categories in `src/llm/retry.js#retryClassified`; gateway/server categories have shorter hardcoded deadlines (30s / 60s). |
+| `RUMMY_LLM_MAX_BACKOFF` | 30000 | Max single backoff between retry attempts (ms) for warmup/rate_limit categories. |
 **LLM providers** (plugin-scoped; a provider with no config is inert):
@@ -1699,9 +1884,11 @@ local `node_modules` then global).
 | Var | Purpose |
 |-----|---------|
-| `RUMMY_SEARCH` | `brave` \| `searxng` |
-| `BRAVE_API_KEY` | Brave Search API key |
-| `RUMMY_SEARXNG_URL` | SearXNG instance URL |
+| `RUMMY_WEB_SEARXNG_URL` | SearXNG instance URL (SearXNG federates Brave / DuckDuckGo / Wikipedia / etc. upstream and normalizes the responses) |
+| `RUMMY_WEB_FETCH_TIMEOUT` | Playwright `page.goto` timeout (ms) |
+| `RUMMY_WEB_PLAYWRIGHT_WS` | Optional CDP endpoint for shared chromium |
+| `RUMMY_WEB_NO_SANDBOX` | `1` to drop chromium's user-namespace sandbox |
+| `RUMMY_WEB_CHROMIUM_HEAP_MB` | Cap chromium's V8 heap (MB) |
 **Testing:**