npm - @possumtech/rummy - Versions diffs - 2.1.0 → 2.2.1 - Mend

@possumtech/rummy 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.env.example +40 -15
package/.xai.key +1 -0
package/PLUGINS.md +169 -53
package/README.md +38 -32
package/SPEC.md +366 -179
package/bin/digest.js +1097 -0
package/biome/no-fallbacks.grit +2 -2
package/gemini.key +1 -0
package/lang/en.json +10 -1
package/migrations/001_initial_schema.sql +9 -2
package/package.json +19 -8
package/service.js +1 -0
package/src/agent/AgentLoop.js +76 -26
package/src/agent/ContextAssembler.js +2 -0
package/src/agent/Entries.js +238 -60
package/src/agent/ProjectAgent.js +44 -0
package/src/agent/TurnExecutor.js +99 -30
package/src/agent/XmlParser.js +206 -111
package/src/agent/errors.js +35 -0
package/src/agent/known_queries.sql +1 -1
package/src/agent/known_store.sql +3 -42
package/src/agent/materializeContext.js +30 -1
package/src/agent/runs.sql +8 -18
package/src/agent/tokens.js +0 -1
package/src/agent/turns.sql +1 -0
package/src/hooks/Hooks.js +26 -0
package/src/hooks/RummyContext.js +12 -1
package/src/lib/hedberg/README.md +60 -0
package/src/lib/hedberg/hedberg.js +60 -0
package/src/lib/hedberg/marker.js +158 -0
package/src/{plugins → lib}/hedberg/matcher.js +1 -2
package/src/llm/LlmProvider.js +41 -3
package/src/llm/openaiStream.js +17 -0
package/src/plugins/ask_user/ask_user.js +12 -2
package/src/plugins/ask_user/ask_userDoc.md +1 -5
package/src/plugins/budget/README.md +29 -24
package/src/plugins/budget/budget.js +166 -110
package/src/plugins/cli/README.md +3 -4
package/src/plugins/cli/cli.js +31 -5
package/src/plugins/cloudflare/cloudflare.js +136 -0
package/src/plugins/cp/cp.js +41 -4
package/src/plugins/cp/cpDoc.md +5 -6
package/src/plugins/engine/engine.sql +1 -1
package/src/plugins/env/README.md +5 -4
package/src/plugins/env/env.js +7 -4
package/src/plugins/env/envDoc.md +7 -8
package/src/plugins/error/error.js +56 -15
package/src/plugins/file/README.md +12 -3
package/src/plugins/file/file.js +2 -2
package/src/plugins/get/get.js +59 -36
package/src/plugins/get/getDoc.md +10 -34
package/src/plugins/google/google.js +115 -0
package/src/plugins/hedberg/hedberg.js +13 -56
package/src/plugins/helpers.js +66 -12
package/src/plugins/index.js +1 -2
package/src/plugins/instructions/README.md +44 -47
package/src/plugins/instructions/instructions-system.md +44 -0
package/src/plugins/instructions/instructions-user.md +53 -0
package/src/plugins/instructions/instructions.js +58 -189
package/src/plugins/known/README.md +6 -7
package/src/plugins/known/known.js +24 -30
package/src/plugins/log/log.js +41 -32
package/src/plugins/mv/mv.js +40 -1
package/src/plugins/mv/mvDoc.md +1 -8
package/src/plugins/ollama/ollama.js +4 -3
package/src/plugins/openai/openai.js +4 -3
package/src/plugins/openrouter/openrouter.js +14 -4
package/src/plugins/persona/README.md +11 -13
package/src/plugins/persona/default.md +29 -0
package/src/plugins/persona/persona.js +10 -66
package/src/plugins/policy/policy.js +23 -22
package/src/plugins/prompt/README.md +37 -27
package/src/plugins/prompt/prompt.js +13 -19
package/src/plugins/rm/rm.js +18 -0
package/src/plugins/rm/rmDoc.md +5 -6
package/src/plugins/rpc/rpc.js +3 -3
package/src/plugins/set/set.js +205 -323
package/src/plugins/set/setDoc.md +47 -17
package/src/plugins/sh/README.md +6 -5
package/src/plugins/sh/sh.js +8 -5
package/src/plugins/sh/shDoc.md +7 -8
package/src/plugins/skill/README.md +37 -14
package/src/plugins/skill/skill.js +200 -101
package/src/plugins/skill/skillDoc.js +3 -0
package/src/plugins/skill/skillDoc.md +9 -0
package/src/plugins/stream/README.md +7 -6
package/src/plugins/stream/finalize.js +100 -0
package/src/plugins/stream/stream.js +13 -45
package/src/plugins/telemetry/telemetry.js +27 -4
package/src/plugins/think/think.js +2 -3
package/src/plugins/think/thinkDoc.md +2 -4
package/src/plugins/unknown/README.md +1 -1
package/src/plugins/unknown/unknown.js +17 -19
package/src/plugins/update/update.js +4 -51
package/src/plugins/update/updateDoc.md +21 -6
package/src/plugins/xai/xai.js +68 -102
package/src/plugins/yolo/yolo.js +102 -75
package/src/sql/functions/hedmatch.js +1 -1
package/src/sql/functions/hedreplace.js +1 -1
package/src/sql/functions/hedsearch.js +1 -1
package/src/sql/functions/slugify.js +16 -2
package/BENCH_ENVIRONMENT.md +0 -230
package/CLIENT_INTERFACE.md +0 -396
package/last_run.txt +0 -5617
package/scriptify/ask_run.js +0 -77
package/scriptify/cache_probe.js +0 -66
package/scriptify/cache_probe_grok.js +0 -74
package/src/agent/budget.js +0 -33
package/src/agent/config.js +0 -38
package/src/plugins/hedberg/README.md +0 -71
package/src/plugins/hedberg/docs.md +0 -0
package/src/plugins/hedberg/edits.js +0 -55
package/src/plugins/hedberg/normalize.js +0 -17
package/src/plugins/hedberg/sed.js +0 -49
package/src/plugins/instructions/instructions.md +0 -34
package/src/plugins/instructions/instructions_104.md +0 -8
package/src/plugins/instructions/instructions_105.md +0 -39
package/src/plugins/instructions/instructions_106.md +0 -22
package/src/plugins/instructions/instructions_107.md +0 -17
package/src/plugins/instructions/instructions_108.md +0 -0
package/src/plugins/known/knownDoc.js +0 -3
package/src/plugins/known/knownDoc.md +0 -8
package/src/plugins/unknown/unknownDoc.js +0 -3
package/src/plugins/unknown/unknownDoc.md +0 -11
package/turns/cli_1777462658211/turn_001.txt +0 -772
package/turns/cli_1777462658211/turn_002.txt +0 -606
package/turns/cli_1777462658211/turn_003.txt +0 -667
package/turns/cli_1777462658211/turn_004.txt +0 -297
package/turns/cli_1777462658211/turn_005.txt +0 -301
package/turns/cli_1777462658211/turn_006.txt +0 -262
package/turns/cli_1777465095132/turn_001.txt +0 -715
package/turns/cli_1777465095132/turn_002.txt +0 -236
package/turns/cli_1777465095132/turn_003.txt +0 -287
package/turns/cli_1777465095132/turn_004.txt +0 -694
package/turns/cli_1777465095132/turn_005.txt +0 -422
package/turns/cli_1777465095132/turn_006.txt +0 -365
package/turns/cli_1777465095132/turn_007.txt +0 -885
package/turns/cli_1777465095132/turn_008.txt +0 -1277
package/turns/cli_1777465095132/turn_009.txt +0 -736
/package/src/{plugins → lib}/hedberg/patterns.js +0 -0

package/src/plugins/stream/finalize.js ADDED Viewed

@@ -0,0 +1,100 @@
+import Entries from "../../agent/Entries.js";
+import { logPathToDataBase } from "../helpers.js";
+// Single termination site for streaming entries. Both stream/completed
+// (external producer signaling close) and yolo's local child-spawn
+// close handler funnel through here so finalization shape stays
+// identical: channel terminal states, log-entry body rewrite, and
+// dormant-run wake all live in one place.
+//
+// terminalState: "resolved" (exit_code=0), "failed" (non-zero).
+// Aborts/cancellations write their own state ("cancelled") through the
+// stream/aborted and stream/cancel paths and do NOT call this helper —
+// explicit cancellation should not summon a follow-up turn.
+export default async function finalizeStream({
+	db,
+	entries,
+	hooks,
+	runRow,
+	path,
+	exitCode = 0,
+	duration = null,
+	wake = true,
+}) {
+	const rawBase = logPathToDataBase(path);
+	if (!rawBase) {
+		throw new Error(
+			`path must be a log entry (log://turn_N/...); got: ${path}`,
+		);
+	}
+	// The log entry path may arrive in its raw URL-encoded form (e.g.
+	// `%20` for spaces) but the data-channel rows are stored under the
+	// canonical form (`%20` → `_` via encodeSegment). Normalize the
+	// derived dataBase so `${dataBase}_*` matches the stored channel
+	// paths regardless of which form the caller passed in.
+	const dataBase = Entries.normalizePath(rawBase);
+	// Pin every state-transition write to the action's originating turn.
+	// Without this, entries.set's default turn=0 re-stamps the entry's
+	// run_view.turn to 0 — and the auto-failure hook then derives
+	// log://turn_0/error/... for failures that actually happened on
+	// turn N.
+	const turnMatch = path.match(/^log:\/\/turn_(\d+)\//);
+	const turn = turnMatch ? Number(turnMatch[1]) : 0;
+	const runId = runRow.id;
+	const terminalState = exitCode === 0 ? "resolved" : "failed";
+	const terminalOutcome = exitCode === 0 ? null : `exit:${exitCode}`;
+	const channels = await entries.getEntriesByPattern(
+		runId,
+		`${dataBase}_*`,
+		null,
+	);
+	for (const ch of channels) {
+		await entries.set({
+			runId,
+			turn,
+			path: ch.path,
+			state: terminalState,
+			body: ch.body,
+			outcome: terminalOutcome,
+		});
+	}
+	const logEntry = await entries.getAttributes(runId, path);
+	let command = "";
+	if (logEntry?.command) command = logEntry.command;
+	else if (logEntry?.summary) command = logEntry.summary;
+	const channelSummary = channels
+		.map((c) => {
+			const size = c.body ? `${c.tokens} tokens` : "empty";
+			return `${c.path} (${size})`;
+		})
+		.join(", ");
+	const dur = duration ? ` (${duration})` : "";
+	const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
+	const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
+	await entries.set({ runId, turn, path, state: "resolved", body });
+	if (!wake) return { channels: channels.length };
+	// Dormancy: any pending (100) or active (102) loop on the run blocks
+	// the wake — the active loop will see the new log entry on its next
+	// turn assembly and the producer doesn't owe it a fresh prompt.
+	const inflight = await db.get_pending_loops.all({ run_id: runId });
+	if (inflight.length > 0) return { channels: channels.length, woke: false };
+	// Mode for the wake loop: inherit from the latest completed loop on
+	// the run. Fresh runs without a completed loop don't get woken (the
+	// child closing before any loop terminated is a state we'd never
+	// reach in practice).
+	const latest = await db.get_latest_completed_loop.get({ run_id: runId });
+	if (!latest) return { channels: channels.length, woke: false };
+	await hooks.run.wake.emit({
+		runAlias: runRow.alias,
+		body: "Process complete",
+		mode: latest.mode,
+	});
+	return { channels: channels.length, woke: true };
+}

package/src/plugins/stream/stream.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { logPathToDataBase } from "../helpers.js";
+import finalizeStream from "./finalize.js";
 // RPC plumbing that appends/terminates streaming data entries; see plugin README.
 export default class Stream {
@@ -59,55 +60,22 @@ export default class Stream {
 					alias: params.run,
 				});
 				if (!runRow) throw new Error(`run not found: ${params.run}`);
-				const runId = runRow.id;
 				const { exit_code: exitCode = 0, duration = null } = params;
-				const terminalState = exitCode === 0 ? "resolved" : "failed";
-				const terminalOutcome = exitCode === 0 ? null : `exit:${exitCode}`;
-				const dataBase = logPathToDataBase(params.path);
-				if (!dataBase) {
-					throw new Error(
-						`path must be a log entry (log://turn_N/...); got: ${params.path}`,
-					);
-				}
-				// Find all `{dataBase}_*` data entries (channels 1, 2, ...).
-				const store = ctx.projectAgent.entries;
-				const channels = await store.getEntriesByPattern(
-					runId,
-					`${dataBase}_*`,
-					null,
-				);
-				for (const ch of channels) {
-					await store.set({
-						runId,
-						path: ch.path,
-						state: terminalState,
-						body: ch.body,
-						outcome: terminalOutcome,
-					});
-				}
-				// One-line final stats for the log entry body.
-				const logEntry = await store.getAttributes(runId, params.path);
-				let command = "";
-				if (logEntry?.command) command = logEntry.command;
-				else if (logEntry?.summary) command = logEntry.summary;
-				const channelSummary = channels
-					.map((c) => {
-						const size = c.body ? `${c.tokens} tokens` : "empty";
-						return `${c.path} (${size})`;
-					})
-					.join(", ");
-				const dur = duration ? ` (${duration})` : "";
-				const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
-				const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
-				await store.set({ runId, path: params.path, state: "resolved", body });
-				return { ok: true, channels: channels.length };
+				const result = await finalizeStream({
+					db: ctx.db,
+					entries: ctx.projectAgent.entries,
+					hooks,
+					runRow,
+					path: params.path,
+					exitCode,
+					duration,
+					wake: true,
+				});
+				return { ok: true, ...result };
 			},
 			description:
-				"Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise) and rewrites the log entry body with exit code, duration, and channel sizes.",
+				"Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise), rewrites the log entry body with exit code/duration/channel sizes, and wakes the run with a 'Process complete' prompt if it has gone dormant.",
 			params: {
 				run: "string — run alias",
 				path: "string — log-entry path (log://turn_N/{action}/{slug}); server derives the data channel path",

package/src/plugins/telemetry/telemetry.js CHANGED Viewed

@@ -206,6 +206,18 @@ export default class Telemetry {
 		if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
 		else if (assembledTokens) actualContextTokens = assembledTokens;
 		const numberOrZero = (v) => (typeof v === "number" ? v : 0);
+		// Forensic metadata blob — everything the provider sent that
+		// isn't content/reasoning_content (those live elsewhere) or
+		// already-derived columns (token counts, cost). Catches
+		// finish_reason, system_fingerprint, response id, service_tier,
+		// raw usage, and any provider-specific fields that may light up
+		// future investigations. JSON column tolerates shape drift.
+		const responseMetadata = {
+			finish_reason: result.choices[0].finish_reason,
+			model_returned: result.model,
+			usage: result.usage,
+			...result.chunkMetadata,
+		};
 		await rummy.entries.updateTurnStats({
 			id: rummy.turnId,
 			context_tokens: actualContextTokens,
@@ -217,12 +229,23 @@ export default class Telemetry {
 			completion_tokens: numberOrZero(usage.completion_tokens),
 			reasoning_tokens: reasoningTokens,
 			total_tokens: numberOrZero(usage.total_tokens),
-			// usage.cost is what the relay BILLED us; it reads 0 when routed
-			// via BYOK (relay didn't bill — upstream charged our key directly).
-			// upstream_inference_cost is the true compute cost in either case.
+			// Cost surfaces under different field names by provider:
+			// - OpenRouter direct: `usage.cost` (USD, what the relay billed us)
+			// - OpenRouter BYOK: `usage.cost.upstream_inference_cost` (USD,
+			//   relay didn't bill — upstream charged our key directly, so
+			//   `usage.cost` is 0 and the true compute cost lives here).
+			// - xAI direct: `usage.cost_in_usd_ticks` where 1 tick = 10⁻¹⁰
+			//   USD (verified empirically: 11 uncached + 161 cached + 1
+			//   output tokens → 107,500 ticks → $0.00001075 at xAI's
+			//   $0.20/M input, $0.05/M cached, $0.50/M output rates).
+			//   Divide by 1e10 to land in USD alongside the others.
+			// All three normalized to USD; downstream summaries sum them
+			// as comparable dollars.
 			cost:
 				numberOrZero(usage.cost) ||
-				numberOrZero(usage.cost_details?.upstream_inference_cost),
+				numberOrZero(usage.cost_details?.upstream_inference_cost) ||
+				numberOrZero(usage.cost_in_usd_ticks) / 1e10,
+			response_metadata: JSON.stringify(responseMetadata),
 		});
 	}

package/src/plugins/think/think.js CHANGED Viewed

@@ -1,12 +1,11 @@
-import config from "../../agent/config.js";
 import docs from "./thinkDoc.js";
-const { THINK } = config;
+const THINK = process.env.RUMMY_THINK === "1";
 export default class Think {
 	constructor(core) {
 		core.registerScheme({ modelVisible: 0, category: "logging" });
-		if (THINK === "1") {
+		if (THINK) {
 			core.ensureTool();
 			core.filter("instructions.toolDocs", async (docsMap) => {
 				docsMap.think = docs;

package/src/plugins/think/thinkDoc.md CHANGED Viewed

@@ -1,7 +1,5 @@
 ## <think>[reasoning]</think> - Think before acting
-* Use <think></think> before any other tools to plan your approach
-<!-- Positioning: think first, then act. Prevents degenerate tool-call storms. -->
+Example: <think>Plan: <search> for X; <get> the top-ranked result; distill into known://Y.</think>
-* Reasoning inside <think></think> is private — it does not appear in your context
-<!-- Frees the model to reason without consuming context budget. -->
+* Reasoning inside <think></think> is private — it does not appear in your context.

package/src/plugins/unknown/README.md CHANGED Viewed

@@ -9,7 +9,7 @@ The Rumsfeld mechanism. The model registers what it doesn't know before acting.
 - **Tool**: `unknown`
 - **Category**: `unknown`
 - **Handler**: None — recorded by TurnExecutor, deduplicated against existing unknowns.
-- **Filter**: `assembly.user` at priority 200 — renders `<unknowns>` adjacent to `<prompt>` (priority 300), after `<performed>` (priority 100). Unknowns are active work, not stable environment state; they belong in the user packet.
+- **Filter**: `assembly.user` at priority 150 — renders `<unknowns>` after `<log>` (priority 100) and before `<instructions>` (priority 165) in the sandwich. Unknowns are active work, not stable environment state; they belong in the user packet.
 ## Projection

package/src/plugins/unknown/unknown.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { renderEntry, SUMMARY_MAX_CHARS } from "../helpers.js";
 export default class Unknown {
 	constructor(core) {
 		core.ensureTool();
@@ -7,7 +9,10 @@ export default class Unknown {
 		core.on("handler", this.handler.bind(this));
 		core.on("visible", this.full.bind(this));
 		core.on("summarized", this.summary.bind(this));
-		core.filter("assembly.user", this.assembleUnknowns.bind(this), 150);
+		core.filter("assembly.user", this.assembleUnknowns.bind(this), 175);
+		// Hidden from the advertised tool list — the model writes unknowns
+		// via <set path="unknown://..."/>. The unknown:// scheme lifecycle
+		// is taught in instructions-user.md, not in a separate tooldoc.
 		core.markHidden();
 	}
@@ -28,12 +33,12 @@ export default class Unknown {
 			return;
 		}
-		// summary > body for slug; lets the model round-trip via <get>.
+		// tags > body for slug; lets the model round-trip via <get>.
 		const unknownPath = await store.slugPath(
 			runId,
 			"unknown",
 			entry.body,
-			entry.attributes?.summary,
+			entry.attributes?.tags,
 		);
 		await store.set({
 			runId,
@@ -49,11 +54,10 @@ export default class Unknown {
 		return entry.body;
 	}
-	// First 500 chars; matches knowns/prompt summarized.
+	// First SUMMARY_MAX_CHARS of the body. Matches <known> / <prompt>.
 	summary(entry) {
 		if (!entry.body) return "";
-		if (entry.body.length <= 500) return entry.body;
-		return `${entry.body.slice(0, 500)}\n[truncated — promote to see the full question]`;
+		return entry.body.slice(0, SUMMARY_MAX_CHARS);
 	}
 	async assembleUnknowns(content, ctx) {
@@ -69,18 +73,12 @@ function renderUnknownTag(entry) {
 		typeof entry.attributes === "string"
 			? JSON.parse(entry.attributes)
 			: entry.attributes;
-	const turn = entry.source_turn ? ` turn="${entry.source_turn}"` : "";
-	const visibility = entry.visibility
-		? ` visibility="${entry.visibility}"`
-		: "";
-	const tokens = entry.aTokens != null ? ` tokens="${entry.aTokens}"` : "";
-	const summary =
-		typeof attrs?.summary === "string"
-			? ` summary="${attrs.summary.replace(/"/g, "'").slice(0, 80)}"`
-			: "";
-	const attrStr = `${turn}${summary}${visibility}${tokens}`;
-	if (entry.body) {
-		return `<unknown path="${entry.path}"${attrStr}>${entry.body}</unknown>`;
+	const meta = {};
+	if (entry.source_turn) meta.turn = entry.source_turn;
+	if (typeof attrs?.tags === "string") {
+		meta.tags = attrs.tags.slice(0, 80);
 	}
-	return `<unknown path="${entry.path}"${attrStr}/>`;
+	if (entry.visibility) meta.visibility = entry.visibility;
+	if (entry.aTokens != null) meta.tokens = entry.aTokens;
+	return renderEntry(entry.path, meta, entry.body);
 }

package/src/plugins/update/update.js CHANGED Viewed

@@ -1,16 +1,8 @@
 import docs from "./updateDoc.js";
-const TERMINAL_STATUSES = new Set([200, 204, 422, 500]);
 const CONTRACT_REMINDER = "Missing update";
-const EMPTY_RESPONSE_REMINDER =
-	"Response empty - Update with status 500 if unable to fulfill request.";
-function isValidStatus(status) {
-	if (TERMINAL_STATUSES.has(status)) return true;
-	return Number.isInteger(status) && status >= 100 && status < 200;
-}
+const EMPTY_RESPONSE_REMINDER = "Response empty";
 export default class Update {
 	#core;
@@ -32,53 +24,14 @@ export default class Update {
 	}
 	async handler(entry, rummy) {
-		const { entries: store, sequence: turn, runId, loopId } = rummy;
-		const status = entry.attributes?.status ?? 102;
-		const validation = await rummy.hooks.instructions.validateNavigation(
-			status,
-			rummy,
-		);
-		if (!validation.ok) {
-			entry.state = "failed";
-			entry.outcome = "invalid_navigation";
-			entry.body = validation.reason;
-			await store.set({
-				runId,
-				turn,
-				loopId,
-				path: entry.resultPath,
-				body: validation.reason,
-				state: "failed",
-				outcome: "invalid_navigation",
-				attributes: { status },
-			});
-			return;
-		}
-		if (!isValidStatus(status)) {
-			entry.state = "failed";
-			entry.outcome = "invalid_status";
-			const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
-			entry.body = message;
-			await store.set({
-				runId,
-				turn,
-				loopId,
-				path: entry.resultPath,
-				body: message,
-				state: "failed",
-				outcome: "invalid_status",
-				attributes: { status },
-			});
-			return;
-		}
-		await rummy.update(entry.body, { status });
+		await rummy.update(entry.body, { status: entry.attributes?.status });
 	}
 	async resolve({ recorded, content, runId, turn, loopId, rummy }) {
 		const entry = recorded.findLast((e) => e.scheme === "update");
-		const status = entry?.attributes?.status ?? 102;
+		const status = entry?.attributes?.status;
 		const failed = entry?.state === "failed";
-		const isTerminal = TERMINAL_STATUSES.has(status) && !failed;
+		const isTerminal = status === 200 && !failed;
 		let summaryText = null;
 		let updateText = null;
 		if (entry?.body && !failed) {

package/src/plugins/update/updateDoc.md CHANGED Viewed

@@ -1,8 +1,23 @@
-## <update status="N">{brief status}</update> - Report turn status (exactly one per turn, at the end)
-<!-- Header defines position, frequency, and status code requirement. -->
+## <update status="N">{ direct answer or one-line summary }</update> - Turn termination
-YOU MUST refer to your current stage instructions for valid values of N.
-<!-- Single source of truth for codes is the current phase instructions block, not this doc. Listing codes here leaks termination knowledge (e.g. 200) that strong models use to short-circuit the protocol. -->
+YOU MUST conclude every turn with one (and only one) <update status="N"></update>.
+YOU MUST keep the update body to <= 80 characters.
+YOU MUST use status 102 for continuation and 200 for final delivery.
-YOU MUST keep <update></update> body to <= 80 characters.
-<!-- Length cap. -->
+Example:
+	{ demote irrelevant source entries and log entries }
+	<set path="known://plan"><<SEARCH
+	- [ ] Distill geography unknowns
+	SEARCH
+	<<REPLACE
+	- [x] Distill geography unknowns
+	REPLACE</set>
+	<update status="102">distilled three unknowns into known://trivia/geography/capitals</update>
+Example:
+	<set path="known://plan"><<SEARCH
+	- [ ] Deliver direct answer
+	SEARCH
+	<<REPLACE
+	- [x] Deliver direct answer
+	REPLACE</set>
+	<update status="200">Paris</update>

package/src/plugins/xai/xai.js CHANGED Viewed

@@ -1,12 +1,28 @@
-import config from "../../agent/config.js";
 import msg from "../../agent/messages.js";
-import { parseRetryAfter } from "../../llm/errors.js";
+import { chatCompletionStream } from "../../llm/openaiStream.js";
-const { FETCH_TIMEOUT } = config;
+const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
+// reasoning_effort takes low|medium|high|none. Models that don't support
+// the parameter reject the request with 400, so the env knob is opt-in:
+// set it only on profiles targeting a model that accepts it.
+const REASONING_EFFORT = process.env.RUMMY_REASONING_EFFORT;
 const PROVIDER = "xai";
-// Inert unless XAI_BASE_URL set; xai/{model} aliases; normalizes to OpenAI envelope.
+// Inert unless XAI_BASE_URL set; xai/{model} aliases.
+//
+// XAI_BASE_URL points at xAI's v1 root (e.g. https://api.x.ai/v1).
+// We POST to {base}/chat/completions and stream the response via the
+// shared OpenAI-compatible client — this is the path that surfaces
+// reasoning_content deltas. The /v1/responses endpoint is xAI's newer
+// API but its non-streaming output drops reasoning content (we still
+// pay for it via reasoning_tokens; we just never see it). Streaming on
+// /v1/responses uses a different event shape that our shared stream
+// client doesn't speak. So we use /v1/chat/completions: caching is
+// preserved via the `x-grok-conv-id` header (xAI's chat-completions
+// equivalent of the /v1/responses `prompt_cache_key` body field).
+// See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
 export default class Xai {
 	#baseUrl;
 	#apiKey;
@@ -15,7 +31,22 @@ export default class Xai {
 	constructor(core) {
 		const baseUrl = process.env.XAI_BASE_URL;
 		if (!baseUrl) return;
-		this.#baseUrl = baseUrl;
+		this.#baseUrl = baseUrl.replace(/\/$/, "");
+		// Fail-fast on the legacy `/v1/responses` endpoint (used in earlier
+		// rummy versions before we switched to streaming /chat/completions).
+		// Composing `${baseUrl}/chat/completions` against a stale shell
+		// `XAI_BASE_URL=https://api.x.ai/v1/responses` produces a 404 route
+		// that escapes to AgentLoop's outer catch and 500-storms a sweep
+		// silently. Throwing at construction surfaces the env trap before
+		// any task starts (verified pathology: 2026-05-01 sweep, 31/31
+		// status=500). xAI's API root ends in `/v1`; anything else is wrong.
+		if (!/\/v1$/.test(this.#baseUrl)) {
+			throw new Error(
+				`XAI_BASE_URL must be the API root ending in /v1 (got "${this.#baseUrl}"). ` +
+					"Likely a stale shell env from earlier /v1/responses usage; " +
+					"set XAI_BASE_URL=https://api.x.ai/v1 (or the relevant proxy root).",
+			);
+		}
 		this.#apiKey = process.env.XAI_API_KEY;
 		const wireModel = (alias) => alias.split("/").slice(1).join("/");
@@ -32,119 +63,57 @@ export default class Xai {
 	async #completion(messages, model, options = {}) {
 		if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
-		const body = { model, input: messages };
+		const body = { model, messages };
+		if (options.maxTokens !== undefined) body.max_tokens = options.maxTokens;
 		if (options.temperature !== undefined)
 			body.temperature = options.temperature;
-		// xAI auto-caches per-server; stable prompt_cache_key keeps a multi-
-		// turn run pinned to the same backend so the cached prefix actually
-		// hits. Without this, requests load-balance and cache_tokens stays
-		// near-zero. See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
-		if (options.runAlias) body.prompt_cache_key = options.runAlias;
+		if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
 		const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
 		const signal = options.signal
 			? AbortSignal.any([options.signal, timeoutSignal])
 			: timeoutSignal;
-		const response = await fetch(this.#baseUrl, {
-			method: "POST",
-			headers: {
-				Authorization: `Bearer ${this.#apiKey}`,
-				"Content-Type": "application/json",
-			},
-			body: JSON.stringify(body),
-			signal,
-		});
-		if (!response.ok) {
-			const errorBody = await response.text();
-			const retryAfter = parseRetryAfter(response.headers.get("retry-after"));
-			if (response.status === 401 || response.status === 403) {
-				const err = new Error(
+		const headers = {
+			Authorization: `Bearer ${this.#apiKey}`,
+		};
+		// Pin caching to the run alias. xAI's chat-completions cache is
+		// per-server; same conv-id routes to the same backend, which is
+		// where the cached prefix lives. Without this, requests load-
+		// balance across servers and cached_tokens stays near zero.
+		if (options.runAlias) headers["x-grok-conv-id"] = options.runAlias;
+		try {
+			return await chatCompletionStream({
+				url: `${this.#baseUrl}/chat/completions`,
+				headers,
+				body,
+				signal,
+			});
+		} catch (err) {
+			if (err.status === 401 || err.status === 403) {
+				throw new Error(
 					msg("error.xai_auth", {
-						status: `${response.status} - ${errorBody}`,
+						status: `${err.status} - ${err.body}`,
 					}),
 				);
-				err.status = response.status;
-				err.body = errorBody;
-				throw err;
 			}
-			const err = new Error(
-				msg("error.xai_api", {
-					status: `${response.status} - ${errorBody}`,
-				}),
-			);
-			err.status = response.status;
-			err.body = errorBody;
-			err.retryAfter = retryAfter;
-			throw err;
-		}
-		return this.#normalize(await response.json());
-	}
-	#normalize(data) {
-		let content = "";
-		let reasoningContent = null;
-		for (const item of data.output) {
-			if (item.type === "reasoning") {
-				const text = this.#extractText(item.content);
-				if (text)
-					reasoningContent = reasoningContent
-						? `${reasoningContent}\n${text}`
-						: text;
-			}
-			if (item.type === "message") {
-				const text = this.#extractText(item.content);
-				if (text) content = content ? `${content}\n${text}` : text;
+			if (err.status) {
+				throw new Error(
+					msg("error.xai_api", {
+						status: `${err.status} - ${err.body}`,
+					}),
+				);
 			}
+			throw err;
 		}
-		const { usage } = data;
-		const inputTokens = usage.input_tokens;
-		const outputTokens = usage.output_tokens;
-		// Optional per xAI API; absent on providers that don't surface them.
-		const cached = usage.input_tokens_details?.cached_tokens;
-		const reasoningTokens = usage.output_tokens_details?.reasoning_tokens;
-		const costTicks = usage.cost_in_usd_ticks;
-		return {
-			choices: [
-				{
-					message: {
-						role: "assistant",
-						content,
-						reasoning_content: reasoningContent,
-					},
-				},
-			],
-			usage: {
-				prompt_tokens: inputTokens,
-				cached_tokens: cached === undefined ? 0 : cached,
-				completion_tokens: outputTokens,
-				reasoning_tokens: reasoningTokens === undefined ? 0 : reasoningTokens,
-				total_tokens: inputTokens + outputTokens,
-				cost: costTicks === undefined ? 0 : costTicks / 10_000_000_000,
-			},
-		};
-	}
-	#extractText(content) {
-		if (typeof content === "string") return content;
-		if (!Array.isArray(content)) return null;
-		const joined = content
-			.filter((c) => c.type === "text" || c.type === "output_text")
-			.map((c) => c.text)
-			.join("\n");
-		return joined ? joined : null;
 	}
 	async #getContextSize(model) {
 		if (this.#contextCache.has(model)) return this.#contextCache.get(model);
 		if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
-		const modelsUrl = this.#baseUrl.replace(/\/responses$/, "/models");
-		const res = await fetch(modelsUrl, {
+		const res = await fetch(`${this.#baseUrl}/models`, {
 			headers: { Authorization: `Bearer ${this.#apiKey}` },
 			signal: AbortSignal.timeout(FETCH_TIMEOUT),
 		});
@@ -164,10 +133,7 @@ export default class Xai {
 			}
 		}
-		const langUrl = this.#baseUrl.replace(
-			/\/responses$/,
-			`/language-models/${model}`,
-		);
+		const langUrl = `${this.#baseUrl}/language-models/${model}`;
 		// Optional probe; failure falls through to terminal throw below.
 		const langRes = await fetch(langUrl, {
 			headers: { Authorization: `Bearer ${this.#apiKey}` },