npm - @possumtech/rummy - Versions diffs - 2.0.0 → 2.1.0 - Mend

@possumtech/rummy 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/.env.example +31 -5
package/BENCH_ENVIRONMENT.md +230 -0
package/CLIENT_INTERFACE.md +396 -0
package/PLUGINS.md +93 -1
package/SPEC.md +389 -28
package/bin/postinstall.js +2 -2
package/bin/rummy.js +2 -2
package/last_run.txt +5617 -0
package/migrations/001_initial_schema.sql +2 -1
package/package.json +13 -9
package/scriptify/ask_run.js +77 -0
package/scriptify/cache_probe.js +66 -0
package/scriptify/cache_probe_grok.js +74 -0
package/service.js +22 -11
package/src/agent/AgentLoop.js +62 -157
package/src/agent/ContextAssembler.js +2 -9
package/src/agent/Entries.js +54 -98
package/src/agent/ProjectAgent.js +4 -11
package/src/agent/TurnExecutor.js +48 -83
package/src/agent/XmlParser.js +247 -273
package/src/agent/budget.js +5 -28
package/src/agent/config.js +38 -0
package/src/agent/errors.js +7 -13
package/src/agent/httpStatus.js +1 -19
package/src/agent/known_queries.sql +1 -1
package/src/agent/known_store.sql +12 -2
package/src/agent/materializeContext.js +15 -18
package/src/agent/pathEncode.js +5 -0
package/src/agent/rummyHome.js +9 -0
package/src/agent/runs.sql +37 -0
package/src/agent/tokens.js +7 -7
package/src/hooks/HookRegistry.js +1 -16
package/src/hooks/Hooks.js +8 -33
package/src/hooks/PluginContext.js +3 -21
package/src/hooks/RpcRegistry.js +1 -4
package/src/hooks/RummyContext.js +6 -16
package/src/hooks/ToolRegistry.js +5 -15
package/src/llm/LlmProvider.js +41 -33
package/src/llm/errors.js +41 -4
package/src/llm/openaiStream.js +125 -0
package/src/llm/retry.js +109 -0
package/src/plugins/budget/budget.js +55 -76
package/src/plugins/cli/README.md +87 -0
package/src/plugins/cli/bin.js +61 -0
package/src/plugins/cli/cli.js +120 -0
package/src/plugins/env/README.md +2 -1
package/src/plugins/env/env.js +4 -6
package/src/plugins/env/envDoc.md +2 -2
package/src/plugins/error/error.js +23 -23
package/src/plugins/file/file.js +2 -22
package/src/plugins/get/get.js +12 -34
package/src/plugins/get/getDoc.md +8 -6
package/src/plugins/hedberg/edits.js +1 -11
package/src/plugins/hedberg/hedberg.js +3 -26
package/src/plugins/hedberg/normalize.js +1 -5
package/src/plugins/hedberg/patterns.js +4 -15
package/src/plugins/hedberg/sed.js +1 -7
package/src/plugins/helpers.js +28 -20
package/src/plugins/index.js +25 -41
package/src/plugins/instructions/README.md +18 -0
package/src/plugins/instructions/instructions.js +97 -38
package/src/plugins/instructions/instructions.md +24 -15
package/src/plugins/instructions/instructions_104.md +5 -4
package/src/plugins/instructions/instructions_105.md +29 -36
package/src/plugins/instructions/instructions_106.md +22 -0
package/src/plugins/instructions/instructions_107.md +17 -0
package/src/plugins/instructions/instructions_108.md +0 -8
package/src/plugins/known/README.md +26 -6
package/src/plugins/known/known.js +37 -34
package/src/plugins/log/README.md +2 -2
package/src/plugins/log/log.js +27 -34
package/src/plugins/ollama/ollama.js +50 -66
package/src/plugins/openai/openai.js +26 -44
package/src/plugins/openrouter/openrouter.js +28 -52
package/src/plugins/policy/README.md +8 -2
package/src/plugins/policy/policy.js +8 -21
package/src/plugins/prompt/README.md +22 -0
package/src/plugins/prompt/prompt.js +14 -16
package/src/plugins/rm/rm.js +5 -2
package/src/plugins/rm/rmDoc.md +4 -4
package/src/plugins/rpc/README.md +2 -1
package/src/plugins/rpc/rpc.js +62 -48
package/src/plugins/set/README.md +5 -1
package/src/plugins/set/set.js +23 -33
package/src/plugins/set/setDoc.md +1 -1
package/src/plugins/sh/README.md +2 -1
package/src/plugins/sh/sh.js +5 -11
package/src/plugins/sh/shDoc.md +2 -2
package/src/plugins/stream/README.md +6 -5
package/src/plugins/stream/stream.js +6 -35
package/src/plugins/telemetry/telemetry.js +26 -19
package/src/plugins/think/think.js +4 -7
package/src/plugins/unknown/unknown.js +8 -13
package/src/plugins/update/update.js +42 -25
package/src/plugins/update/updateDoc.md +3 -3
package/src/plugins/xai/xai.js +30 -20
package/src/plugins/yolo/yolo.js +159 -0
package/src/server/ClientConnection.js +17 -47
package/src/server/SocketServer.js +14 -14
package/src/server/protocol.js +1 -10
package/src/sql/functions/slugify.js +5 -7
package/src/sql/v_model_context.sql +4 -11
package/turns/cli_1777462658211/turn_001.txt +772 -0
package/turns/cli_1777462658211/turn_002.txt +606 -0
package/turns/cli_1777462658211/turn_003.txt +667 -0
package/turns/cli_1777462658211/turn_004.txt +297 -0
package/turns/cli_1777462658211/turn_005.txt +301 -0
package/turns/cli_1777462658211/turn_006.txt +262 -0
package/turns/cli_1777465095132/turn_001.txt +715 -0
package/turns/cli_1777465095132/turn_002.txt +236 -0
package/turns/cli_1777465095132/turn_003.txt +287 -0
package/turns/cli_1777465095132/turn_004.txt +694 -0
package/turns/cli_1777465095132/turn_005.txt +422 -0
package/turns/cli_1777465095132/turn_006.txt +365 -0
package/turns/cli_1777465095132/turn_007.txt +885 -0
package/turns/cli_1777465095132/turn_008.txt +1277 -0
package/turns/cli_1777465095132/turn_009.txt +736 -0

package/src/plugins/set/README.md CHANGED Viewed

@@ -15,7 +15,7 @@ SEARCH/REPLACE edits, and pattern updates.
 - **Category**: `logging`
 - **Handler**: Routes based on attributes:
   - `blocks` or `search` — SEARCH/REPLACE edit via `processEdit`.
-  - `preview` — pattern preview (dry run).
+  - `manifest` — pattern manifest (lists matches without performing the set).
   - Scheme path — direct upsert at status 200.
   - File path — produces status 202 (proposed) with unified diff patch.
   - Glob/filter — bulk update via `updateBodyByPattern`.
@@ -31,3 +31,7 @@ the merge conflict block when a SEARCH/REPLACE was performed.
 - **Heuristic fallback**: On literal failure, fuzzy matching with warnings.
 - **Patch generation**: `generatePatch` produces unified diff for client display.
 - File writes are always status 202 (proposed); scheme writes resolve immediately.
+- **`proposal.content` filter** — when the client accepts a proposed
+  set, this plugin overrides the resolved body to the body it
+  already staged on the audit entry (rather than whatever literal
+  body the client passed through `resolve`).

package/src/plugins/set/set.js CHANGED Viewed

@@ -79,12 +79,7 @@ export default class Set {
 			}
 		}
 		const turn = (await db.get_run_by_id.get({ id: runId })).next_turn;
-		// Preserve the file entry's current visibility — a <get>
-		// earlier in the run may have promoted it. Updating the
-		// body without specifying visibility falls through to
-		// the data-category default ("summarized") and wipes
-		// the promotion, making the model re-get the file next
-		// turn (then cycle-strike out).
+		// Preserve current visibility; default would wipe an earlier <get>'s promotion.
 		const existingState = await entries.getState(runId, attrs.path);
 		await entries.set({
 			runId,
@@ -94,9 +89,13 @@ export default class Set {
 			visibility: existingState?.visibility,
 		});
 		if (projectRoot) {
-			const { writeFile } = await import("node:fs/promises");
-			const { join } = await import("node:path");
-			await writeFile(join(projectRoot, attrs.path), patched).catch(() => {});
+			const { writeFile, mkdir } = await import("node:fs/promises");
+			const { dirname, isAbsolute, join } = await import("node:path");
+			const targetPath = isAbsolute(attrs.path)
+				? attrs.path
+				: join(projectRoot, attrs.path);
+			await mkdir(dirname(targetPath), { recursive: true });
+			await writeFile(targetPath, patched);
 		}
 		if (isNewFile && projectId) {
 			await File.setConstraint(db, projectId, attrs.path, "active");
@@ -112,24 +111,22 @@ export default class Set {
 		const rawSummary = typeof attrs.summary === "string" ? attrs.summary : null;
 		const summaryText = rawSummary ? rawSummary.slice(0, 80) : null;
-		// Invalid visibility value on a body-less set: reject with an
-		// error instead of falling through to the write path. Without
-		// this guard, a typo like visibility="promoted" (pre-migration
-		// terminology) silently body-wiped the target — the fidelity
-		// regression that cost us multiple demo runs.
+		// Reject invalid visibility on body-less set; otherwise a typo silently wipes the body.
 		if (
 			!entry.body &&
 			attrs.path &&
 			attrs.visibility !== undefined &&
 			!visibilityAttr
 		) {
-			await rummy.hooks.error.log.emit({
-				store,
+			await store.set({
 				runId,
 				turn,
 				loopId,
-				message: `Invalid visibility "${attrs.visibility}" on <set path="${attrs.path}"/>. Use visibility="visible|summarized|archived".`,
-				status: 400,
+				path: entry.resultPath,
+				body: `Invalid visibility "${attrs.visibility}" on <set path="${attrs.path}"/>. Use visibility="visible|summarized|archived".`,
+				state: "failed",
+				outcome: "validation",
+				attributes: { path: attrs.path },
 			});
 			return;
 		}
@@ -187,8 +184,8 @@ export default class Set {
 		// Edit: sed patterns or SEARCH/REPLACE blocks
 		if (attrs.blocks || attrs.search != null) {
 			await this.#processEdit(rummy, entry, attrs);
-		} else if (attrs.preview && attrs.path) {
-			// Preview
+		} else if (attrs.manifest && attrs.path) {
+			// Manifest: list paths and token costs without performing the operation.
 			const matches = await store.getEntriesByPattern(
 				runId,
 				attrs.path,
@@ -202,7 +199,7 @@ export default class Set {
 				attrs.path,
 				attrs.body,
 				matches,
-				{ preview: true, loopId },
+				{ manifest: true, loopId },
 			);
 			return;
 		} else {
@@ -262,8 +259,7 @@ export default class Set {
 					{ loopId },
 				);
 			} else {
-				// Direct scheme write (known://, unknown://, etc.)
-				// Same result shape as file writes — diff against existing.
+				// Direct scheme write; same diff-against-existing shape as file writes.
 				const existing = await store.getBody(runId, target);
 				const oldContent = existing === null ? "" : existing;
 				const newContent = entry.body;
@@ -280,8 +276,7 @@ export default class Set {
 					path: target,
 					body: newContent,
 					state: "resolved",
-					// Scheme writes default to promoted — the model wrote it, so
-					// it's material unless they explicitly demote/archive.
+					// Scheme writes default visible; the model wrote it.
 					visibility: visibilityAttr ? visibilityAttr : "visible",
 					attributes: summaryText ? { summary: summaryText } : null,
 					loopId,
@@ -340,8 +335,7 @@ export default class Set {
 	summary(entry) {
 		if (!entry.body) return "";
-		// Preserve SEARCH/REPLACE merge blocks intact — truncating them
-		// drops the before/after the model needs to recognize its edit.
+		// Preserve SEARCH/REPLACE blocks intact; truncation strips before/after the model needs.
 		if (/<<<<<<< SEARCH[\s\S]*>>>>>>> REPLACE/.test(entry.body)) {
 			return entry.body;
 		}
@@ -370,10 +364,7 @@ export default class Set {
 		for (const match of matches) {
 			if (match.scheme === null) {
-				// Bare file path — apply the edit immediately against the
-				// match body so the log carries a concrete before/after
-				// merge. #materializeRevisions still runs at turn-end to
-				// consolidate the set:// proposal for client acceptance.
+				// Bare file: apply edit immediately so log carries before/after merge.
 				const canonicalPath = `set://${match.path}`;
 				const revision = Set.#buildRevision(attrs);
 				const existingAttrs = await rummy.getAttributes(canonicalPath);
@@ -533,8 +524,7 @@ export default class Set {
 		}
 	}
-	// `replace` attr is optional in search/replace form — absence means
-	// "delete the match"; normalize to empty string at this boundary.
+	// Missing `replace` = delete the match; normalize to empty string.
 	static #resolveReplace(attrs) {
 		return attrs.replace === undefined ? "" : attrs.replace;
 	}

package/src/plugins/set/setDoc.md CHANGED Viewed

@@ -18,5 +18,5 @@ Example: <set path="src/config.js">s/port = 3000/port = 8080/g;s/We're almost do
 Example: <set path="example.md">Full file content here</set>
 <!-- Create: body contents are entire file. -->
-* YOU MUST NOT use <sh></sh> or <env></env> to list, create, read, or edit files — use <get></get> and <set></set>
+YOU MUST NOT use <sh></sh> or <env></env> to list, create, read, or edit files — use <get></get> and <set></set>
 <!-- Reinforces at the decision point — model reading setDoc for file ops sees the prohibition here, not just buried in shDoc/envDoc which it may not be reading. -->

package/src/plugins/sh/README.md CHANGED Viewed

@@ -24,7 +24,8 @@ record, one data payload:
 - **Data channels**: `sh://turn_N/{slug}_1` (stdout), `sh://turn_N/{slug}_2`
   (stderr) — scheme=`sh`, category=`data`. Created at status=102 on
   proposal acceptance, grow via the `stream` RPC, transition to 200/500
-  via `stream/completed`. Render inside the `<context>` block as `<sh>`.
+  via `stream/completed`. Render inside `<visible>` as `<sh>` when
+  promoted; listed in `<summarized>` otherwise.
 The `sh` scheme exists **only** for the data channels. The proposal/log
 entry itself is in the unified `log://` namespace along with every

package/src/plugins/sh/sh.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { logPathToDataBase } from "../helpers.js";
+import { logPathToDataBase, streamSummary } from "../helpers.js";
 import docs from "./shDoc.js";
 const LOG_ACTION_RE = /^log:\/\/turn_\d+\/(\w+)\//;
@@ -8,11 +8,7 @@ export default class Sh {
 	constructor(core) {
 		this.#core = core;
-		// `sh` scheme holds the streamed stdout/stderr payload — that's
-		// data the model reads, not an audit record. The log entry at
-		// log://turn_N/sh/{slug} (scheme=log, category=logging) is the
-		// audit record; it lives in a separate namespace by design.
-		// See SPEC §streaming_entries and the scheme/category invariant.
+		// data scheme = streamed stdout/stderr; audit lives in log://. SPEC #streaming_entries.
 		core.registerScheme({ category: "data" });
 		core.on("handler", this.handler.bind(this));
 		core.on("visible", this.full.bind(this));
@@ -53,9 +49,7 @@ export default class Sh {
 	async handler(entry, rummy) {
 		const { entries: store, sequence: turn, runId, loopId } = rummy;
-		// Proposal at 202 with the command as summary and empty body — the
-		// body fills in on accept (log message about the action). Data
-		// entries with stdout/stderr are created on accept in resolve().
+		// 202 with command summary, empty body; stdout/stderr entries created on accept.
 		await store.set({
 			runId,
 			turn,
@@ -71,7 +65,7 @@ export default class Sh {
 		return `# sh ${entry.attributes.command}\n${entry.body}`;
 	}
-	summary() {
-		return "";
+	summary(entry) {
+		return streamSummary("sh", entry);
 	}
 }

package/src/plugins/sh/shDoc.md CHANGED Viewed

@@ -6,8 +6,8 @@ Example: <sh>npm install express</sh>
 Example: <sh>npm test</sh>
 <!-- Test execution. Another common side-effect action. -->
-* YOU MUST NOT use <sh></sh> to read, create, or edit files — use <get></get> and <set></set>
+YOU MUST NOT use <sh></sh> to read, create, or edit files — use <get></get> and <set></set>
 <!-- Forces file operations through the entry system. -->
-* YOU MUST use <env></env> for commands without side effects
+YOU MUST use <env></env> for commands without side effects
 <!-- Reinforces the env/sh split. Read = env, mutate = sh. -->

package/src/plugins/stream/README.md CHANGED Viewed

@@ -16,12 +16,13 @@ A streaming action lives in **two namespaces** by design:
   `{action}://turn_N/{slug}_2`, ... — scheme=`{action}` (sh, env, ...),
   category=`data`. Created at status=102 on proposal acceptance. Grow
   via `stream`; terminal via `stream/completed` / `stream/aborted` /
-  `stream/cancel`. Render inside `<context>`.
+  `stream/cancel`. Render inside `<visible>` (or `<summarized>` if
+  demoted).
-The stream RPC `path` param is always the **log-entry path** (that's
-what clients receive on `run/proposal`). The server derives the data
-base path internally via `logPathToDataBase`. See
-[scheme_category_split](#scheme_category_split).
+The stream RPC `path` param is always the **log-entry path** (the
+`log://...` path the client discovers via `getEntries` after a
+`run/changed` pulse). The server derives the data base path internally
+via `logPathToDataBase`. See [scheme_category_split](#scheme_category_split).
 ## RPC Methods

package/src/plugins/stream/stream.js CHANGED Viewed

@@ -1,22 +1,6 @@
 import { logPathToDataBase } from "../helpers.js";
-/**
- * Stream plugin — generic streaming entry infrastructure.
- *
- * Receives chunks from the client (or any producer) and appends them to
- * existing data entries. Producers (sh/env handlers) create the data
- * entries at status=102 on proposal acceptance; this plugin handles the
- * subsequent append + terminal-status transition via two RPC methods.
- *
- * RPC `path` param is the **log-entry path** (log://turn_N/{action}/{slug}
- * — that's what the client sees on `run/proposal`). Channels live under
- * the producer scheme ({action}://turn_N/{slug}_N) for a clean
- * data-vs-logging namespace split; this plugin derives the data base from
- * the log path on every RPC call.
- *
- * Not a model-facing tool. No scheme, no tooldoc, no dispatch handler.
- * Pure RPC plumbing that any streaming-producer plugin can leverage.
- */
+// RPC plumbing that appends/terminates streaming data entries; see plugin README.
 export default class Stream {
 	#core;
@@ -25,9 +9,7 @@ export default class Stream {
 		const hooks = core.hooks;
 		const r = hooks.rpc.registry;
-		// stream: append a chunk to a streaming entry.
-		// Entry path is constructed as `${path}_${channel}` per the Unix FD
-		// convention (1=stdout, 2=stderr, higher=other producer channels).
+		// stream: append chunk; channel = Unix FD (1=stdout, 2=stderr).
 		r.register("stream", {
 			handler: async (params, ctx) => {
 				if (!params.run) throw new Error("run is required");
@@ -67,8 +49,7 @@ export default class Stream {
 			requiresInit: true,
 		});
-		// stream/completed: transition all data channels for this producer
-		// to their terminal status and finalize the log entry body.
+		// stream/completed: terminal status on all channels + finalize log body.
 		r.register("stream/completed", {
 			handler: async (params, ctx) => {
 				if (!params.run) throw new Error("run is required");
@@ -107,8 +88,7 @@ export default class Stream {
 					});
 				}
-				// Update the log entry body with final stats. Keep it terse —
-				// one line summarizing exit code, duration, and channel sizes.
+				// One-line final stats for the log entry body.
 				const logEntry = await store.getAttributes(runId, params.path);
 				let command = "";
 				if (logEntry?.command) command = logEntry.command;
@@ -138,11 +118,7 @@ export default class Stream {
 			requiresInit: true,
 		});
-		// stream/aborted: client-initiated cancellation. Transitions all data
-		// channels to status 499 (Client Closed Request — the de-facto HTTP
-		// status for client-terminated requests) and rewrites the log entry
-		// body to note the abort. Shape mirrors stream/completed for client
-		// symmetry: same run/path addressing, same channel sweep.
+		// stream/aborted: client cancellation; channels → 499; mirrors stream/completed.
 		r.register("stream/aborted", {
 			handler: async (params, ctx) => {
 				if (!params.run) throw new Error("run is required");
@@ -211,12 +187,7 @@ export default class Stream {
 			requiresInit: true,
 		});
-		// stream/cancel: server-initiated cancellation. Any client (or
-		// internal server code) can cancel a streaming producer — the server
-		// transitions channels to 499 immediately and pushes a
-		// stream/cancelled notification so connected clients can kill their
-		// local processes. Also serves as stale 102 cleanup: if the client
-		// died mid-stream, call stream/cancel to mark orphaned entries terminal.
+		// stream/cancel: server-initiated; pushes stream/cancelled notification; cleans stale 102s.
 		r.register("stream/cancel", {
 			handler: async (params, ctx) => {
 				if (!params.run) throw new Error("run is required");

package/src/plugins/telemetry/telemetry.js CHANGED Viewed

@@ -1,12 +1,16 @@
 import { mkdir, writeFile } from "node:fs/promises";
 import { join } from "node:path";
+// model://N is a diagnostic slice; full content is in assistant://N.
+const MODEL_SNAPSHOT_BYTES = 4096;
 export default class Telemetry {
 	#core;
 	#starts = new Map();
 	#lastRunPath = null;
 	#turnsDir = null;
 	#turnLog = [];
+	#turnStartIdx = 0;
 	#currentRunAlias = null;
 	#currentTurn = null;
@@ -31,8 +35,8 @@ export default class Telemetry {
 	async #onRpcStarted({ method, id, params }) {
 		this.#starts.set(id, Date.now());
 		let summary = "";
-		if (method === "ask" || method === "act") {
-			const prompt = params?.prompt ? params.prompt : "";
+		if (method === "set" && params?.path?.startsWith("run://")) {
+			const prompt = params?.body ? params.body : "";
 			summary = `prompt="${prompt.slice(0, 60)}"`;
 		} else if (method === "run/abort") {
 			summary = `run=${params?.run}`;
@@ -40,10 +44,6 @@ export default class Telemetry {
 			summary = `run=${params?.run} action=${params?.resolution?.action}`;
 		}
 		console.log(`[RPC] → ${method}(${id})${summary ? ` ${summary}` : ""}`);
-		if (method === "ask" || method === "act") {
-			this.#turnLog = [];
-		}
 	}
 	async #onRpcCompleted({ method, id, result }) {
@@ -136,7 +136,7 @@ export default class Telemetry {
 				reasoning_content: responseMessage?.reasoning_content
 					? responseMessage.reasoning_content
 					: null,
-				content: content.slice(0, 4096),
+				content: content.slice(0, MODEL_SNAPSHOT_BYTES),
 				usage: result.usage ? result.usage : null,
 				model: result.model ? result.model : null,
 			}),
@@ -161,10 +161,7 @@ export default class Telemetry {
 			}
 		}
-		// content://N — unparsed text. 400 Bad Request because anything in
-		// unparsed is text the parser couldn't dispatch (malformed XML, native
-		// tool call attempts, reasoning bleed). Visible to the model so it
-		// sees the rejection on its next turn and can correct.
+		// content://N — visible-rejected unparsed text so the model can correct next turn.
 		if (unparsed) {
 			await store.set({
 				runId,
@@ -179,9 +176,7 @@ export default class Telemetry {
 			});
 		}
-		// Commit usage stats. Providers surface token counts under
-		// incompatible keys; walk them in priority order and fall back
-		// to 0 only as the definitional "not reported" value.
+		// Per-provider key drift; walk in priority order, 0 = not reported.
 		const usage = result.usage ? result.usage : {};
 		const cachedSources = [
 			usage.cached_tokens,
@@ -206,8 +201,7 @@ export default class Telemetry {
 				reasoningTokens = v;
 				break;
 			}
-		// Use LLM's actual prompt_tokens as the ground-truth context size
-		// when available; falls back to our pre-call estimate.
+		// LLM's prompt_tokens is ground truth; estimator is pre-call fallback.
 		let actualContextTokens = 0;
 		if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
 		else if (assembledTokens) actualContextTokens = assembledTokens;
@@ -223,15 +217,27 @@ export default class Telemetry {
 			completion_tokens: numberOrZero(usage.completion_tokens),
 			reasoning_tokens: reasoningTokens,
 			total_tokens: numberOrZero(usage.total_tokens),
-			cost: numberOrZero(usage.cost),
+			// usage.cost is what the relay BILLED us; it reads 0 when routed
+			// via BYOK (relay didn't bill — upstream charged our key directly).
+			// upstream_inference_cost is the true compute cost in either case.
+			cost:
+				numberOrZero(usage.cost) ||
+				numberOrZero(usage.cost_details?.upstream_inference_cost),
 		});
 	}
 	async #logMessages(messages, context) {
-		this.#currentRunAlias = context.runAlias
+		const newAlias = context.runAlias
 			? context.runAlias
 			: `run_${context.runId}`;
+		// Reset on alias change (the semantic run boundary).
+		if (newAlias !== this.#currentRunAlias) {
+			this.#turnLog = [];
+		}
+		this.#currentRunAlias = newAlias;
 		this.#currentTurn = context.turn === undefined ? null : context.turn;
+		// Per-turn slice index; turn_NNN.txt = this turn only, last_run.txt = cumulative.
+		this.#turnStartIdx = this.#turnLog.length;
 		const turnLabel = this.#currentTurn === null ? "?" : this.#currentTurn;
 		this.#turnLog.push(
 			`\n${"=".repeat(60)}\nTURN ${turnLabel} — model=${context.model} run=${this.#currentRunAlias}\n${"=".repeat(60)}`,
@@ -272,6 +278,7 @@ export default class Telemetry {
 		const runDir = join(this.#turnsDir, this.#currentRunAlias);
 		await mkdir(runDir, { recursive: true });
 		const fileName = `turn_${String(this.#currentTurn).padStart(3, "0")}.txt`;
-		await writeFile(join(runDir, fileName), `${this.#turnLog.join("\n")}\n`);
+		const turnSlice = this.#turnLog.slice(this.#turnStartIdx);
+		await writeFile(join(runDir, fileName), `${turnSlice.join("\n")}\n`);
 	}
 }

package/src/plugins/think/think.js CHANGED Viewed

@@ -1,13 +1,12 @@
+import config from "../../agent/config.js";
 import docs from "./thinkDoc.js";
-const THINK_ENABLED = process.env.RUMMY_THINK;
-if (THINK_ENABLED === undefined)
-	throw new Error("RUMMY_THINK must be set (1 or 0)");
+const { THINK } = config;
 export default class Think {
 	constructor(core) {
 		core.registerScheme({ modelVisible: 0, category: "logging" });
-		if (THINK_ENABLED === "1") {
+		if (THINK === "1") {
 			core.ensureTool();
 			core.filter("instructions.toolDocs", async (docsMap) => {
 				docsMap.think = docs;
@@ -15,9 +14,7 @@ export default class Think {
 			});
 		}
-		// Merge <think> tag bodies into the turn's reasoning_content so
-		// models without a dedicated reasoning channel still expose their
-		// reasoning through the same field.
+		// Merge <think> bodies into reasoning_content for models without a reasoning channel.
 		core.filter("llm.reasoning", (reasoning, { commands }) => {
 			const thinkText = commands
 				.filter((c) => c.name === "think")

package/src/plugins/unknown/unknown.js CHANGED Viewed

@@ -1,8 +1,5 @@
 export default class Unknown {
-	#core;
 	constructor(core) {
-		this.#core = core;
 		core.ensureTool();
 		core.registerScheme({
 			category: "unknown",
@@ -10,28 +7,28 @@ export default class Unknown {
 		core.on("handler", this.handler.bind(this));
 		core.on("visible", this.full.bind(this));
 		core.on("summarized", this.summary.bind(this));
-		core.filter("assembly.user", this.assembleUnknowns.bind(this), 200);
+		core.filter("assembly.user", this.assembleUnknowns.bind(this), 150);
 		core.markHidden();
 	}
 	async handler(entry, rummy) {
 		const { entries: store, sequence: turn, runId, loopId } = rummy;
-		// Deduplicate — if this exact body already exists, skip
 		const existingValues = await store.getUnknownValues(runId);
 		if (existingValues.has(entry.body)) {
-			await this.#core.hooks.error.log.emit({
-				store,
+			await store.set({
 				runId,
 				turn,
 				loopId,
-				message: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
+				path: entry.resultPath || entry.path,
+				body: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
+				state: "failed",
+				outcome: "duplicate",
 			});
 			return;
 		}
-		// Generate slug path and upsert. Summary (if provided) becomes the
-		// path so the model can round-trip it via <get>; body is the fallback.
+		// summary > body for slug; lets the model round-trip via <get>.
 		const unknownPath = await store.slugPath(
 			runId,
 			"unknown",
@@ -52,9 +49,7 @@ export default class Unknown {
 		return entry.body;
 	}
-	// Same principle as knowns: keep the first 500 characters on
-	// summarized unknowns so demotion doesn't erase the question,
-	// but cap large bodies to bound the packet cost.
+	// First 500 chars; matches knowns/prompt summarized.
 	summary(entry) {
 		if (!entry.body) return "";
 		if (entry.body.length <= 500) return entry.body;

package/src/plugins/update/update.js CHANGED Viewed

@@ -32,44 +32,61 @@ export default class Update {
 	}
 	async handler(entry, rummy) {
+		const { entries: store, sequence: turn, runId, loopId } = rummy;
 		const status = entry.attributes?.status ?? 102;
+		const validation = await rummy.hooks.instructions.validateNavigation(
+			status,
+			rummy,
+		);
+		if (!validation.ok) {
+			entry.state = "failed";
+			entry.outcome = "invalid_navigation";
+			entry.body = validation.reason;
+			await store.set({
+				runId,
+				turn,
+				loopId,
+				path: entry.resultPath,
+				body: validation.reason,
+				state: "failed",
+				outcome: "invalid_navigation",
+				attributes: { status },
+			});
+			return;
+		}
+		if (!isValidStatus(status)) {
+			entry.state = "failed";
+			entry.outcome = "invalid_status";
+			const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
+			entry.body = message;
+			await store.set({
+				runId,
+				turn,
+				loopId,
+				path: entry.resultPath,
+				body: message,
+				state: "failed",
+				outcome: "invalid_status",
+				attributes: { status },
+			});
+			return;
+		}
 		await rummy.update(entry.body, { status });
 	}
-	/**
-	 * Classify this turn's update state.
-	 *
-	 * Returns { summaryText, updateText }:
-	 *   - summaryText: non-null → model claimed terminal (200/204/422)
-	 *   - updateText:  non-null → model is continuing (1xx)
-	 *
-	 * Errors (invalid status, missing update) emit via hooks.error.log.
-	 * The "terminal + turn had errors → not actually terminal" rule
-	 * lives in the error plugin's verdict, not here.
-	 */
 	async resolve({ recorded, content, runId, turn, loopId, rummy }) {
 		const entry = recorded.findLast((e) => e.scheme === "update");
 		const status = entry?.attributes?.status ?? 102;
-		const isTerminal = TERMINAL_STATUSES.has(status);
+		const failed = entry?.state === "failed";
+		const isTerminal = TERMINAL_STATUSES.has(status) && !failed;
 		let summaryText = null;
 		let updateText = null;
-		if (entry?.body) {
+		if (entry?.body && !failed) {
 			if (isTerminal) summaryText = entry.body;
 			else updateText = entry.body;
 		}
-		if (entry && !isValidStatus(status)) {
-			await rummy.hooks.error.log.emit({
-				store: rummy.entries,
-				runId,
-				turn,
-				loopId,
-				message: `Invalid status ${entry.attributes?.status} on update — use 1xx to continue or 200 to conclude.`,
-				status: 422,
-			});
-		}
-		if (!summaryText && !updateText) {
+		if (!summaryText && !updateText && !failed) {
 			const empty = !content || content.trim() === "";
 			await rummy.hooks.error.log.emit({
 				store: rummy.entries,

package/src/plugins/update/updateDoc.md CHANGED Viewed

@@ -1,8 +1,8 @@
-## <update status="N">{brief status}</update> - Status report (exactly one per turn, at the end)
+## <update status="N">{brief status}</update> - Report turn status (exactly one per turn, at the end)
 <!-- Header defines position, frequency, and status code requirement. -->
-REQUIRED: the valid values of N are defined by your current stage instructions.
+YOU MUST refer to your current stage instructions for valid values of N.
 <!-- Single source of truth for codes is the current phase instructions block, not this doc. Listing codes here leaks termination knowledge (e.g. 200) that strong models use to short-circuit the protocol. -->
-REQUIRED: YOU MUST keep <update></update> body to <= 80 characters.
+YOU MUST keep <update></update> body to <= 80 characters.
 <!-- Length cap. -->