npm - @possumtech/rummy - Versions diffs - 2.1.0 → 2.2.1 - Mend

@possumtech/rummy 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.env.example +40 -15
package/.xai.key +1 -0
package/PLUGINS.md +169 -53
package/README.md +38 -32
package/SPEC.md +366 -179
package/bin/digest.js +1097 -0
package/biome/no-fallbacks.grit +2 -2
package/gemini.key +1 -0
package/lang/en.json +10 -1
package/migrations/001_initial_schema.sql +9 -2
package/package.json +19 -8
package/service.js +1 -0
package/src/agent/AgentLoop.js +76 -26
package/src/agent/ContextAssembler.js +2 -0
package/src/agent/Entries.js +238 -60
package/src/agent/ProjectAgent.js +44 -0
package/src/agent/TurnExecutor.js +99 -30
package/src/agent/XmlParser.js +206 -111
package/src/agent/errors.js +35 -0
package/src/agent/known_queries.sql +1 -1
package/src/agent/known_store.sql +3 -42
package/src/agent/materializeContext.js +30 -1
package/src/agent/runs.sql +8 -18
package/src/agent/tokens.js +0 -1
package/src/agent/turns.sql +1 -0
package/src/hooks/Hooks.js +26 -0
package/src/hooks/RummyContext.js +12 -1
package/src/lib/hedberg/README.md +60 -0
package/src/lib/hedberg/hedberg.js +60 -0
package/src/lib/hedberg/marker.js +158 -0
package/src/{plugins → lib}/hedberg/matcher.js +1 -2
package/src/llm/LlmProvider.js +41 -3
package/src/llm/openaiStream.js +17 -0
package/src/plugins/ask_user/ask_user.js +12 -2
package/src/plugins/ask_user/ask_userDoc.md +1 -5
package/src/plugins/budget/README.md +29 -24
package/src/plugins/budget/budget.js +166 -110
package/src/plugins/cli/README.md +3 -4
package/src/plugins/cli/cli.js +31 -5
package/src/plugins/cloudflare/cloudflare.js +136 -0
package/src/plugins/cp/cp.js +41 -4
package/src/plugins/cp/cpDoc.md +5 -6
package/src/plugins/engine/engine.sql +1 -1
package/src/plugins/env/README.md +5 -4
package/src/plugins/env/env.js +7 -4
package/src/plugins/env/envDoc.md +7 -8
package/src/plugins/error/error.js +56 -15
package/src/plugins/file/README.md +12 -3
package/src/plugins/file/file.js +2 -2
package/src/plugins/get/get.js +59 -36
package/src/plugins/get/getDoc.md +10 -34
package/src/plugins/google/google.js +115 -0
package/src/plugins/hedberg/hedberg.js +13 -56
package/src/plugins/helpers.js +66 -12
package/src/plugins/index.js +1 -2
package/src/plugins/instructions/README.md +44 -47
package/src/plugins/instructions/instructions-system.md +44 -0
package/src/plugins/instructions/instructions-user.md +53 -0
package/src/plugins/instructions/instructions.js +58 -189
package/src/plugins/known/README.md +6 -7
package/src/plugins/known/known.js +24 -30
package/src/plugins/log/log.js +41 -32
package/src/plugins/mv/mv.js +40 -1
package/src/plugins/mv/mvDoc.md +1 -8
package/src/plugins/ollama/ollama.js +4 -3
package/src/plugins/openai/openai.js +4 -3
package/src/plugins/openrouter/openrouter.js +14 -4
package/src/plugins/persona/README.md +11 -13
package/src/plugins/persona/default.md +29 -0
package/src/plugins/persona/persona.js +10 -66
package/src/plugins/policy/policy.js +23 -22
package/src/plugins/prompt/README.md +37 -27
package/src/plugins/prompt/prompt.js +13 -19
package/src/plugins/rm/rm.js +18 -0
package/src/plugins/rm/rmDoc.md +5 -6
package/src/plugins/rpc/rpc.js +3 -3
package/src/plugins/set/set.js +205 -323
package/src/plugins/set/setDoc.md +47 -17
package/src/plugins/sh/README.md +6 -5
package/src/plugins/sh/sh.js +8 -5
package/src/plugins/sh/shDoc.md +7 -8
package/src/plugins/skill/README.md +37 -14
package/src/plugins/skill/skill.js +200 -101
package/src/plugins/skill/skillDoc.js +3 -0
package/src/plugins/skill/skillDoc.md +9 -0
package/src/plugins/stream/README.md +7 -6
package/src/plugins/stream/finalize.js +100 -0
package/src/plugins/stream/stream.js +13 -45
package/src/plugins/telemetry/telemetry.js +27 -4
package/src/plugins/think/think.js +2 -3
package/src/plugins/think/thinkDoc.md +2 -4
package/src/plugins/unknown/README.md +1 -1
package/src/plugins/unknown/unknown.js +17 -19
package/src/plugins/update/update.js +4 -51
package/src/plugins/update/updateDoc.md +21 -6
package/src/plugins/xai/xai.js +68 -102
package/src/plugins/yolo/yolo.js +102 -75
package/src/sql/functions/hedmatch.js +1 -1
package/src/sql/functions/hedreplace.js +1 -1
package/src/sql/functions/hedsearch.js +1 -1
package/src/sql/functions/slugify.js +16 -2
package/BENCH_ENVIRONMENT.md +0 -230
package/CLIENT_INTERFACE.md +0 -396
package/last_run.txt +0 -5617
package/scriptify/ask_run.js +0 -77
package/scriptify/cache_probe.js +0 -66
package/scriptify/cache_probe_grok.js +0 -74
package/src/agent/budget.js +0 -33
package/src/agent/config.js +0 -38
package/src/plugins/hedberg/README.md +0 -71
package/src/plugins/hedberg/docs.md +0 -0
package/src/plugins/hedberg/edits.js +0 -55
package/src/plugins/hedberg/normalize.js +0 -17
package/src/plugins/hedberg/sed.js +0 -49
package/src/plugins/instructions/instructions.md +0 -34
package/src/plugins/instructions/instructions_104.md +0 -8
package/src/plugins/instructions/instructions_105.md +0 -39
package/src/plugins/instructions/instructions_106.md +0 -22
package/src/plugins/instructions/instructions_107.md +0 -17
package/src/plugins/instructions/instructions_108.md +0 -0
package/src/plugins/known/knownDoc.js +0 -3
package/src/plugins/known/knownDoc.md +0 -8
package/src/plugins/unknown/unknownDoc.js +0 -3
package/src/plugins/unknown/unknownDoc.md +0 -11
package/turns/cli_1777462658211/turn_001.txt +0 -772
package/turns/cli_1777462658211/turn_002.txt +0 -606
package/turns/cli_1777462658211/turn_003.txt +0 -667
package/turns/cli_1777462658211/turn_004.txt +0 -297
package/turns/cli_1777462658211/turn_005.txt +0 -301
package/turns/cli_1777462658211/turn_006.txt +0 -262
package/turns/cli_1777465095132/turn_001.txt +0 -715
package/turns/cli_1777465095132/turn_002.txt +0 -236
package/turns/cli_1777465095132/turn_003.txt +0 -287
package/turns/cli_1777465095132/turn_004.txt +0 -694
package/turns/cli_1777465095132/turn_005.txt +0 -422
package/turns/cli_1777465095132/turn_006.txt +0 -365
package/turns/cli_1777465095132/turn_007.txt +0 -885
package/turns/cli_1777465095132/turn_008.txt +0 -1277
package/turns/cli_1777465095132/turn_009.txt +0 -736
/package/src/{plugins → lib}/hedberg/patterns.js +0 -0

package/src/agent/TurnExecutor.js CHANGED Viewed

@@ -76,6 +76,7 @@ export default class TurnExecutor {
 				contextSize,
 				systemPrompt: null,
 				loopPrompt,
+				signal,
 			},
 		);
 		await this.#hooks.turn.started.emit({
@@ -88,14 +89,18 @@ export default class TurnExecutor {
 		await this.#hooks.processTurn(rummy);
-		const systemPrompt =
-			await this.#hooks.instructions.resolveSystemPrompt(rummy);
+		// Run persona feeds the assembly.system chain (persona plugin's
+		// participant at priority 150). Loaded once per turn; the system
+		// prompt is built directly by the chain — no resolveSystemPrompt
+		// indirection.
+		const runRow = await this.#db.get_run_by_id.get({ id: currentRunId });
 		const budgetCtx = {
 			runId: currentRunId,
 			loopId: currentLoopId,
 			turn,
-			systemPrompt,
+			systemPrompt: "",
+			persona: runRow.persona,
 			mode,
 			toolSet,
 			loopIteration,
@@ -103,6 +108,7 @@ export default class TurnExecutor {
 		const initial = await materializeContext({
 			db: this.#db,
 			hooks: this.#hooks,
+			entries: this.#entries,
 			contextSize,
 			...budgetCtx,
 		});
@@ -113,18 +119,22 @@ export default class TurnExecutor {
 			rowCount: initial.rows.length,
 		});
-		const budgetResult = await this.#hooks.budget.enforce({
-			contextSize,
-			messages: initial.messages,
-			rows: initial.rows,
-			lastPromptTokens: initial.lastContextTokens,
-			ctx: budgetCtx,
-			rummy,
-		});
-		const messages = budgetResult.messages;
-		const assembledTokens = budgetResult.assembledTokens;
+		const dispatchPacket = await this.#hooks.turn.beforeDispatch.filter(
+			{
+				contextSize,
+				messages: initial.messages,
+				rows: initial.rows,
+				lastPromptTokens: initial.lastContextTokens,
+				assembledTokens: 0,
+				ok: true,
+				overflow: null,
+			},
+			{ rummy, ctx: budgetCtx },
+		);
+		const messages = dispatchPacket.messages;
+		const assembledTokens = dispatchPacket.assembledTokens;
-		if (!budgetResult.ok) {
+		if (!dispatchPacket.ok) {
 			return {
 				turn,
 				turnId: turnRow.id,
@@ -133,11 +143,10 @@ export default class TurnExecutor {
 				updateText: null,
 				assembledTokens,
 				contextSize,
-				overflow: budgetResult.overflow,
+				overflow: dispatchPacket.overflow,
 			};
 		}
-		const runRow = await this.#db.get_run_by_id.get({ id: currentRunId });
 		const filteredMessages = await this.#hooks.llm.messages.filter(messages, {
 			model: requestedModel,
 			projectId,
@@ -180,6 +189,35 @@ export default class TurnExecutor {
 					contextSize,
 				};
 			}
+			// LLM fetch hit its per-call ceiling (provider's
+			// AbortSignal.timeout(FETCH_TIMEOUT) fired). Convert to a
+			// 504 strike so the loop continues — one timed-out turn is
+			// recoverable; MAX_STRIKES in a row abandon at 499. Without
+			// this catch the AbortError escapes to AgentLoop's outer
+			// catch and the run dies at status=500, losing all prior
+			// productive turns. signal.aborted being true means OUR
+			// controller fired (drain), not a fetch timeout — re-throw
+			// so AgentLoop ends the run cleanly at 499.
+			if (err?.name === "TimeoutError" || err?.name === "AbortError") {
+				if (signal?.aborted) throw err;
+				await this.#hooks.error.log.emit({
+					store: this.#entries,
+					runId: currentRunId,
+					turn,
+					loopId: currentLoopId,
+					message: `LLM call timed out: ${err.message}`,
+					status: 504,
+				});
+				return {
+					turn,
+					turnId: turnRow.id,
+					recorded: [],
+					summaryText: null,
+					updateText: null,
+					assembledTokens,
+					contextSize,
+				};
+			}
 			throw err;
 		}
 		const result = await this.#hooks.llm.response.filter(rawResult, {
@@ -196,6 +234,10 @@ export default class TurnExecutor {
 		const content = responseMessage?.content ? responseMessage.content : "";
 		const { commands, warnings, unparsed } = XmlParser.parse(content);
+		// Parser warnings are recovered emissions — the parser already
+		// corrected a mismatched/unclosed tag and produced commands. Log
+		// them so the model sees what happened, but don't strike: the
+		// turn's productive work is intact.
 		for (const w of warnings) {
 			await this.#hooks.error.log.emit({
 				store: this.#entries,
@@ -204,6 +246,7 @@ export default class TurnExecutor {
 				message: w,
 				loopId: currentLoopId,
 				status: 422,
+				soft: true,
 			});
 		}
 		if (commands.length === 0 && unparsed?.trim() && warnings.length === 0) {
@@ -217,6 +260,27 @@ export default class TurnExecutor {
 			});
 		}
+		// Contract floor: a turn without <update> is malformed; refuse to
+		// honor its side effects. Repetition loops, partial outputs, and
+		// other broken responses commonly emit actions without closure;
+		// dispatching them anyway lets a broken turn corrupt state. Skip
+		// recording AND dispatching when commands are present but no
+		// <update> closes the turn — the strike system still fires via
+		// turnErrors, model retries cleanly next turn.
+		const hasUpdate = commands.some((c) => c.name === "update");
+		const skipDispatch = commands.length > 0 && !hasUpdate;
+		if (skipDispatch) {
+			await this.#hooks.error.log.emit({
+				store: this.#entries,
+				runId: currentRunId,
+				turn,
+				loopId: currentLoopId,
+				message:
+					"Turn rejected: no <update> emitted. Actions are not honored unless the turn ends with an <update>.",
+				status: 422,
+			});
+		}
 		// Layer plugin reasoning contributions onto the API-provided seed.
 		if (responseMessage) {
 			const seed = responseMessage.reasoning_content
@@ -242,17 +306,19 @@ export default class TurnExecutor {
 			userMsg: userMsg?.content,
 		});
-		// PHASE 1: RECORD
+		// PHASE 1: RECORD (skipped when skipDispatch — broken turn, no side effects)
 		const recorded = [];
-		for (const cmd of commands) {
-			const entry = await this.#record(
-				currentRunId,
-				currentLoopId,
-				turn,
-				mode,
-				cmd,
-			);
-			if (entry) recorded.push(entry);
+		if (!skipDispatch) {
+			for (const cmd of commands) {
+				const entry = await this.#record(
+					currentRunId,
+					currentLoopId,
+					turn,
+					mode,
+					cmd,
+				);
+				if (entry) recorded.push(entry);
+			}
 		}
 		// PHASE 2: DISPATCH — sequential; abort-after-failure; proposals notify-and-await.
@@ -334,7 +400,7 @@ export default class TurnExecutor {
 			}
 		}
-		await this.#hooks.budget.postDispatch({
+		await this.#hooks.turn.dispatched.emit({
 			contextSize,
 			ctx: budgetCtx,
 			rummy,
@@ -379,8 +445,11 @@ export default class TurnExecutor {
 		if (cmd.path) rawTarget = cmd.path;
 		else if (cmd.command) rawTarget = cmd.command;
 		else if (cmd.question) rawTarget = cmd.question;
-		// Reject likely reasoning bleed: oversize or control chars in target.
-		if (rawTarget.length > 512 || /\p{Cc}/u.test(rawTarget)) {
+		// Reject reasoning-bleed in path-shaped fields only. cmd.command
+		// (sh/env shell scripts) and cmd.question (ask_user prose) are
+		// content fields where newlines/tabs/length are legitimate; the
+		// slugifier sanitizes them downstream when deriving the log path.
+		if (cmd.path && (cmd.path.length > 2048 || /\p{Cc}/u.test(cmd.path))) {
 			const rejectPath = await this.#entries.logPath(
 				runId,
 				turn,
@@ -391,7 +460,7 @@ export default class TurnExecutor {
 				runId,
 				turn,
 				path: rejectPath,
-				body: `Invalid path: too long or contains non-printing characters`,
+				body: "Invalid path.",
 				state: "failed",
 				outcome: "validation",
 				attributes: { action: scheme },

package/src/agent/XmlParser.js CHANGED Viewed

@@ -1,6 +1,47 @@
-import { parseEditContent } from "../plugins/hedberg/edits.js";
-import { parseJsonEdit } from "../plugins/hedberg/normalize.js";
-import { parseSed } from "../plugins/hedberg/sed.js";
+import {
+	extractSingleHeredoc,
+	parseMarkerBody,
+} from "../lib/hedberg/marker.js";
+// Edit-marker body opacity. When `#findBodyEnd` is scanning a `<set>`
+// body and hits an opener, jump past the matching closer so tag-shaped
+// content inside the marker (`</set>`, `<get/>`, etc.) doesn't trigger
+// structural recovery.
+//
+// Two opener shapes are recognized for opacity:
+//   - `<<IDENT` — current edit syntax (parsed by marker.js).
+//   - `<<:::IDENT` — packet-rendering shape (engine emits via
+//     plugins/helpers.js). A model copy-pasting the packet shape into
+//     a `<set>` body should still get clean opacity even though
+//     marker.js routes such bodies to plain-body REPLACE.
+function skipBareMarker(s, pos) {
+	const m = s.slice(pos).match(/^<<([A-Z][A-Za-z0-9_]*)/);
+	if (!m) return null;
+	const ident = m[1];
+	const openerEnd = pos + m[0].length;
+	const escIdent = ident.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+	const closerRe = new RegExp(`(?<=^|\\s)${escIdent}(?=[\\s<>]|$)`);
+	const cm = s.slice(openerEnd).match(closerRe);
+	if (!cm) return null;
+	return openerEnd + cm.index + cm[0].length;
+}
+function skipPacketMarker(s, pos) {
+	const m = s.slice(pos).match(/^<<:::([A-Za-z_][A-Za-z0-9_./-]*)/);
+	if (!m) return null;
+	const ident = m[1];
+	const openerEnd = pos + m[0].length;
+	const escIdent = ident.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+	const closerRe = new RegExp(`:::${escIdent}(?![A-Za-z0-9_])`);
+	const cm = s.slice(openerEnd).match(closerRe);
+	if (!cm) return null;
+	return openerEnd + cm.index + cm[0].length;
+}
+function skipEditMarker(s, pos) {
+	if (s.startsWith("<<:::", pos)) return skipPacketMarker(s, pos);
+	return skipBareMarker(s, pos);
+}
 const STORE_TOOLS = new Set(["get", "rm", "set", "mv", "cp", "search"]);
 export const ALL_TOOLS = new Set([
@@ -14,69 +55,43 @@ export const ALL_TOOLS = new Set([
 // Per-tool resolution: missing canonical attribute is filled silently from the body.
 function resolveCommand(name, a, rawBody) {
+	// Generic heredoc affordance: any non-`<set>` plugin's body may be
+	// wrapped in a single `<<IDENT...IDENT` heredoc to opaquely contain
+	// multi-line scripts, tag-shaped prose, or content with special
+	// characters. Plugins consume the unwrapped inner body verbatim;
+	// the IDENT is exposed as `heredocIdent` on the command for plugins
+	// that want to act on the label. `<set>` is exempt because it does
+	// its own multi-op heredoc parsing via `parseMarkerBody`.
+	if (name !== "set") {
+		const heredoc = extractSingleHeredoc(rawBody);
+		if (heredoc) {
+			rawBody = heredoc.content;
+			a = { ...a, heredocIdent: heredoc.ident };
+		}
+	}
 	const trimmed = rawBody.trim();
 	if (name === "set") {
-		const hasEdit =
-			/<{3,12} SEARCH/.test(trimmed) ||
-			/>{3,12} REPLACE/.test(trimmed) ||
-			(trimmed.includes("@@") &&
-				(trimmed.includes("\n-") || trimmed.includes("\n+"))) ||
-			trimmed.includes("<old_text>");
-		if (hasEdit) {
-			const blocks = parseEditContent(rawBody);
-			if (blocks.length > 0) {
-				return {
-					name,
-					path: a.path,
-					body: a.body,
-					manifest: a.manifest,
-					blocks,
-				};
-			}
-		}
-		const jsonEdit = parseJsonEdit(trimmed);
-		if (jsonEdit) {
-			return { name, path: a.path, ...jsonEdit };
-		}
-		if (trimmed.startsWith("s/")) {
-			const blocks = parseSed(trimmed);
-			if (blocks?.length === 1) {
-				return {
-					name,
-					path: a.path,
-					search: blocks[0].search,
-					replace: blocks[0].replace,
-					flags: blocks[0].flags,
-					sed: true,
-				};
-			}
-			if (blocks?.length > 1) {
-				return { name, path: a.path, blocks };
-			}
-		}
-		if (a.search) {
-			const replace = a.replace ?? trimmed;
-			return {
-				name,
-				path: a.path,
-				body: a.body,
-				manifest: a.manifest,
-				search: a.search,
-				replace,
-			};
-		}
-		if (trimmed && a.body) {
-			return {
-				name,
-				path: a.path,
-				search: a.body,
-				replace: trimmed,
-				manifest: a.manifest,
-			};
-		}
-		const body = trimmed || a.body || "";
-		return { name, ...a, body };
+		// `search`/`replace` as attributes is no longer in the grammar;
+		// strip them so they can't sneak past via the attribute spread.
+		const { search: _s, replace: _r, ...rest } = a;
+		a = rest;
+		// Self-close / no-body: visibility/metadata op.
+		if (!trimmed) return { name, ...a, body: a.body || "" };
+		// Edit syntax (SPEC.md "Edit Syntax"): walks the body for
+		// `<<:::IDENT...:::IDENT` markers and returns an ordered op
+		// list. No markers → plain body, treated as full-replace.
+		// Non-keyword IDENTs (path-flavored, identifier-flavored)
+		// route to REPLACE so the model gets a working write whatever
+		// IDENT it picks.
+		const { ops, error } = parseMarkerBody(rawBody);
+		if (error) return { name, ...a, error };
+		if (ops) return { name, ...a, operations: ops };
+		// No markers — plain body, full-replace.
+		return { name, ...a, body: trimmed };
 	}
 	if (name === "update") {
@@ -85,55 +100,80 @@ function resolveCommand(name, a, rawBody) {
 		return { name, ...a, body, status };
 	}
+	// Body shorthand fallback: when the attribute is unset (undefined),
+	// fall back to the trimmed body. Empty-string attrs are preserved
+	// as-is — handlers validate. `||` would conflate the two cases.
+	const fromBody = trimmed === "" ? null : trimmed;
 	if (name === "get" || name === "rm") {
-		return { name, ...a, path: a.path || trimmed || null };
+		return { name, ...a, path: a.path ?? fromBody };
 	}
 	if (name === "search") {
-		const path = a.path || trimmed || null;
+		const path = a.path ?? fromBody;
 		const results = a.results ? Number(a.results) : null;
 		return { name, ...a, path, results };
 	}
 	if (name === "mv" || name === "cp") {
-		return { name, ...a, path: a.path, to: a.to || trimmed || null };
+		return { name, ...a, path: a.path, to: a.to ?? fromBody };
 	}
 	if (name === "sh" || name === "env") {
-		const command = a.command || trimmed || null;
+		const command = a.command ?? fromBody;
 		return { name, ...a, command };
 	}
 	if (name === "ask_user") {
-		const question = a.question || null;
-		const options = a.options || trimmed || null;
+		const question = a.question ?? null;
+		const options = a.options ?? fromBody;
 		return { name, ...a, question, options };
 	}
-	return { name, ...a, body: trimmed || a.body };
+	return { name, ...a, body: trimmed === "" ? a.body : trimmed };
 }
 const NAME_CHAR = /[a-zA-Z0-9_]/;
 const ATTR_KEY_CHAR = /[a-zA-Z0-9_:-]/;
 const WS = /\s/;
-// Recovery-tolerant tokenizer for rummy's closed set of tool tags.
+// Tokenizer for rummy's closed set of tool tags. Body opacity for closed
+// bodies; tail recovery for unclosed bodies.
 //
 // Design contract:
 //   - Tool tags (<get>, <set>, <sh>, ...) are the only syntactic special tags.
 //     Any other "<...>" sequence in OUTER text is treated as literal text.
-//   - Inside a tool tag's body, content is OPAQUE: only the matching close
-//     tag is recognized. Body may contain regex (`(?<!`), generics (`Vec<u8>`),
-//     HTML, XML, heredocs, comparison operators — none of it affects parsing.
-//   - Backtick spans (`...`) and triple-backtick fences (```...```) at the
-//     OUTER level neutralize tag-like content, mirroring the markdown
-//     convention that documentation about a tool isn't a tool call.
-//     Inside tool bodies this tracking does NOT apply (body opacity wins).
+//   - Inside a tool tag's body, content is OPAQUE: only the matching
+//     `</tagname>` close (depth-counted for same-name nesting) ends the
+//     body. Mismatched closes of OTHER tag names — `</env>`, `</mv>`,
+//     `</foo>` inside a `<set>` body — are body content, not structural
+//     signals.
+//   - Backtick spans (`...`) and triple-backtick fences (```...```)
+//     suppress tag recognition AT THE OUTER LEVEL ONLY (between tool
+//     calls). Documentation prose with backticked tag examples doesn't
+//     get parsed as commands. Inside tool bodies backticks are content;
+//     bodies that need opacity for tag-like content use the edit-syntax
+//     marker family (see SPEC.md "Edit Syntax"), which has no
+//     false-positive failure modes (unlike inside-body backtick
+//     tracking, which would suppress closing tags on bodies with stray
+//     unbalanced backticks).
+//   - Edit-syntax marker opacity (set only): `<<:::IDENT...:::IDENT`
+//     spans inside a `<set>` body are skipped during tag detection so
+//     content with `</set>` literals or marker-shaped text stays as
+//     body. Multiple markers per body supported; see marker.js.
 //   - Same-name nesting (`<set>...<set/>...</set>`) is depth-counted so
-//     nested examples don't prematurely close the outer.
-//   - Recovery: unclosed openers capture body to EOF + emit a warning.
-//     Orphan closes at outer level become text, no warning (body opacity
-//     means models legitimately write `</set>` in prose / summaries).
+//     nested examples don't prematurely close the outer. Same-name
+//     nesting also disables tail recovery — the model's intent is clearly
+//     opaque body content.
+//   - Unclosed openers (no matching close, no same-name nesting) try
+//     tail recovery: scan the captured body for the leftmost position
+//     whose suffix tokenizes cleanly into ≥1 well-formed tool calls
+//     with zero leftover text. If found, end the unclosed body there
+//     and let the trailing tags parse as proper siblings. The warning
+//     surfaces "Unclosed <name> — recovered N trailing tool call(s)"
+//     so the model can see what happened. If recovery finds nothing,
+//     capture body to EOF and emit "Unclosed <name> — content captured
+//     anyway".
 export default class XmlParser {
 	static MAX_COMMANDS = Number(process.env.RUMMY_MAX_COMMANDS);
@@ -197,11 +237,13 @@ export default class XmlParser {
 			const result = XmlParser.#findBodyEnd(s, name, openerEnd);
 			const body = s.slice(openerEnd, result.bodyEnd);
 			if (result.unclosed) {
-				warnings.push(`Unclosed <${name}> tag — content captured anyway`);
-			} else if (result.mismatchedCloseName) {
-				warnings.push(
-					`Mismatched </${result.mismatchedCloseName}> closing <${name}> — corrected to </${name}>`,
-				);
+				if (result.recoveredTailCount) {
+					warnings.push(
+						`Unclosed <${name}> tag — recovered ${result.recoveredTailCount} trailing tool call(s)`,
+					);
+				} else {
+					warnings.push(`Unclosed <${name}> tag — content captured anyway`);
+				}
 			}
 			commands.push(resolveCommand(name, attrs, body));
 			i = result.afterClose;
@@ -327,18 +369,42 @@ export default class XmlParser {
 	// Scans body content from `fromPos` until the matching `</name>` closer,
 	// counting depth so same-name nested examples don't prematurely close.
-	// Returns { bodyEnd, afterClose, unclosed, mismatchedCloseName }.
+	// Returns { bodyEnd, afterClose, unclosed }.
+	//
+	// Strict body opacity: only `</name>` (matching the open) and same-name
+	// nested opens affect parsing. Mismatched closes of OTHER tag names are
+	// body content, period.
+	//
+	// Backtick fences (`…`, ```…```) inside the body suppress all tag
+	// recognition — a markdown table cell containing `<set>` examples
+	// stays as content, not interpreted as a nested tag. This matches
+	// the outer-level convention and is the load-bearing reason a model
+	// can write documentation about rummy commands inside a deliverable
+	// body without breaking parsing.
 	//
-	// Mismatched-close recovery: if we encounter `</X>` where X != name and X
-	// is not a depth-counted nested tag, we use a balance heuristic to decide
-	// whether the orphan close was a typo (recover here) or legitimate body
-	// content (continue scanning). Specifically: count `</name>` minus
-	// `<name` in the rest of the string; if non-positive, no real close
-	// exists ahead and the orphan must be the intended close.
+	// If the matching close never arrives, emit "Unclosed" so the model
+	// sees a clear failure and corrects on the next turn.
 	static #findBodyEnd(s, name, fromPos) {
 		let depth = 1;
+		let sameNameNested = false;
 		let i = fromPos;
 		while (i < s.length) {
+			// Edit-syntax marker opacity: marker spans (bare `<<IDENT` or
+			// packet-shaped `<<:::IDENT`) are opaque — tag detection
+			// skips them so inner `</set>` and other tag-shaped content
+			// stays as body. Multiple markers per `<set>` body are
+			// supported; check on every iteration.
+			if (
+				name === "set" &&
+				(s.startsWith("<<:::", i) ||
+					(s.startsWith("<<", i) && /^[A-Z]/.test(s[i + 2] ?? "")))
+			) {
+				const skipTo = skipEditMarker(s, i);
+				if (skipTo != null) {
+					i = skipTo;
+					continue;
+				}
+			}
 			if (s[i] !== "<") {
 				i++;
 				continue;
@@ -360,35 +426,64 @@ export default class XmlParser {
 					i = k + 1;
 					continue;
 				}
-				if (isCloseTag && closeName.length > 0) {
-					const rest = s.slice(k + 1);
-					const closesAhead = (
-						rest.match(new RegExp(`<\\/${name}\\b\\s*>`, "g")) || []
-					).length;
-					const opensAhead = (rest.match(new RegExp(`<${name}\\b`, "g")) || [])
-						.length;
-					if (closesAhead - opensAhead < 1) {
-						return {
-							bodyEnd: i,
-							afterClose: k + 1,
-							unclosed: false,
-							mismatchedCloseName: closeName,
-						};
-					}
-				}
 			}
 			const opener = XmlParser.#matchOpener(s, i);
 			if (opener && opener.name === name && !opener.selfClose) {
 				depth++;
+				sameNameNested = true;
 				i = opener.end;
 				continue;
 			}
 			i++;
 		}
+		// Unclosed: try tail recovery, but only if the body never
+		// nested a same-name opener. Same-name nesting is the model
+		// deliberately using opaque body for examples (`<set>` writing
+		// docs about `<set>`); we trust the body content as authored.
+		// No nesting means a plain botched `</set>` — recovery is safe.
+		// If the body's tail is a clean sequence of one or more
+		// well-formed tool calls (zero leftover text), end the body
+		// at the start of that tail and let the outer tokenizer parse
+		// those calls as proper siblings. Closes the silent-swallow
+		// gap when a model botches `</set>` after SEARCH/REPLACE and
+		// emits trailing `<sh>` / `<update>`.
+		if (sameNameNested) {
+			return { bodyEnd: s.length, afterClose: s.length, unclosed: true };
+		}
+		const recovery = XmlParser.#findTailRecovery(s, fromPos);
+		if (recovery) {
+			return {
+				bodyEnd: recovery.tailStart,
+				afterClose: recovery.tailStart,
+				unclosed: true,
+				recoveredTailCount: recovery.commandCount,
+			};
+		}
 		return { bodyEnd: s.length, afterClose: s.length, unclosed: true };
 	}
+	// Scan body content for the leftmost position whose suffix tokenizes
+	// cleanly into ≥1 commands with no leftover non-whitespace text.
+	// Returns { tailStart, commandCount } or null. Only considers opener
+	// positions; treats the suffix as outer-level so backtick fences and
+	// tag recognition match the parent tokenizer's behavior.
+	static #findTailRecovery(s, fromPos) {
+		let best = null;
+		let i = fromPos;
+		while (i < s.length) {
+			if (s[i] === "<" && XmlParser.#matchOpener(s, i)) {
+				const suffix = s.slice(i);
+				const result = XmlParser.#tokenize(suffix, []);
+				if (result.commands.length > 0 && result.unparsed === "") {
+					best = { tailStart: i, commandCount: result.commands.length };
+					break;
+				}
+			}
+			i++;
+		}
+		return best;
+	}
 	// Translate native training-format tool calls into rummy XML silently.
 	static #normalizeToolCalls(content) {
 		// Gemma code-fenced XML.

package/src/agent/errors.js CHANGED Viewed

@@ -1,3 +1,21 @@
+// Outcomes that record a failure but don't strike — findings the model
+// adapts to, not contract violations. `not_found` (model acted on an
+// entry that doesn't exist) and `conflict` (SEARCH text didn't match
+// current body) are recoverable: read the new state, try again.
+// `unparsed` (free text outside any tool tag — comments, "thinking
+// out loud" between tool calls) is non-actionable but not malicious;
+// the empty-turn failure mode is already caught by update plugin's
+// 422 "Missing update", so striking unparsed too is duplicative.
+// Hard outcomes (validation, permission, exit:N) DO strike. Shared
+// between error.js's verdict accumulator (recordedFailed gate) and
+// Entries' auto-failure hook (passes soft=true so error.log.emit
+// skips turn errors increment when the outcome is soft).
+export const SOFT_FAILURE_OUTCOMES = new Set([
+	"not_found",
+	"conflict",
+	"unparsed",
+]);
 // Writer tier excluded from scheme.writable_by; see SPEC writer_tiers.
 export class PermissionError extends Error {
 	constructor(scheme, writer, allowed) {
@@ -14,3 +32,20 @@ export class PermissionError extends Error {
 		this.allowed = [...allowed];
 	}
 }
+// Body length exceeded the entries.body CHECK constraint (RUMMY_ENTRY_SIZE_MAX
+// at create-time). Surfaced as a 413 strike. The cap value lives only in the
+// schema — JS does not duplicate it, because the database persists across
+// rummy invocations and the env var that built the schema may differ from
+// the env var seen by the running instance. Reporting body size is enough
+// for the model to adapt; operators can read the cap from the schema.
+export class EntryOverflowError extends Error {
+	constructor(path, size) {
+		super(
+			`413: entry "${path}" body ${size} bytes exceeds RUMMY_ENTRY_SIZE_MAX`,
+		);
+		this.name = "EntryOverflowError";
+		this.path = path;
+		this.size = size;
+	}
+}