npm - @possumtech/rummy - Versions diffs - 0.2.8 → 0.3.1 - Mend

@possumtech/rummy 0.2.8 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/.env.example +13 -2
package/EXCEPTIONS.md +46 -0
package/PLUGINS.md +422 -188
package/SPEC.md +440 -106
package/migrations/001_initial_schema.sql +5 -3
package/package.json +17 -5
package/service.js +5 -3
package/src/agent/AgentLoop.js +252 -55
package/src/agent/ContextAssembler.js +20 -4
package/src/agent/KnownStore.js +82 -25
package/src/agent/ProjectAgent.js +4 -1
package/src/agent/ResponseHealer.js +86 -32
package/src/agent/TurnExecutor.js +542 -207
package/src/agent/XmlParser.js +77 -41
package/src/agent/known_store.sql +68 -4
package/src/agent/schemes.sql +3 -0
package/src/agent/tokens.js +7 -21
package/src/agent/turns.sql +15 -1
package/src/hooks/HookRegistry.js +7 -0
package/src/hooks/Hooks.js +15 -0
package/src/hooks/PluginContext.js +14 -1
package/src/hooks/RummyContext.js +16 -4
package/src/hooks/ToolRegistry.js +77 -19
package/src/llm/LlmProvider.js +27 -8
package/src/llm/OpenAiClient.js +20 -0
package/src/llm/OpenRouterClient.js +24 -2
package/src/llm/XaiClient.js +47 -2
package/src/plugins/ask_user/README.md +4 -4
package/src/plugins/ask_user/ask_user.js +5 -5
package/src/plugins/ask_user/ask_userDoc.js +29 -0
package/src/plugins/budget/README.md +31 -0
package/src/plugins/budget/budget.js +55 -0
package/src/plugins/cp/README.md +5 -4
package/src/plugins/cp/cp.js +10 -6
package/src/plugins/cp/cpDoc.js +29 -0
package/src/plugins/engine/engine.sql +1 -8
package/src/plugins/engine/turn_context.sql +4 -9
package/src/plugins/env/README.md +3 -4
package/src/plugins/env/env.js +5 -5
package/src/plugins/env/envDoc.js +29 -0
package/src/plugins/file/README.md +9 -12
package/src/plugins/file/file.js +34 -35
package/src/plugins/get/README.md +2 -2
package/src/plugins/get/get.js +77 -6
package/src/plugins/get/getDoc.js +51 -0
package/src/plugins/hedberg/hedberg.js +2 -1
package/src/plugins/hedberg/matcher.js +10 -29
package/src/plugins/hedberg/normalize.js +28 -0
package/src/plugins/hedberg/patterns.js +25 -27
package/src/plugins/hedberg/sed.js +17 -10
package/src/plugins/index.js +66 -14
package/src/plugins/instructions/README.md +6 -2
package/src/plugins/instructions/instructions.js +20 -4
package/src/plugins/instructions/preamble.md +19 -5
package/src/plugins/known/README.md +10 -7
package/src/plugins/known/known.js +23 -17
package/src/plugins/known/knownDoc.js +34 -0
package/src/plugins/mv/README.md +5 -4
package/src/plugins/mv/mv.js +27 -6
package/src/plugins/mv/mvDoc.js +45 -0
package/src/plugins/performed/README.md +15 -0
package/src/plugins/performed/performed.js +45 -0
package/src/plugins/persona/persona.js +78 -0
package/src/plugins/previous/README.md +3 -2
package/src/plugins/previous/previous.js +33 -24
package/src/plugins/progress/README.md +1 -2
package/src/plugins/progress/progress.js +33 -21
package/src/plugins/prompt/README.md +5 -5
package/src/plugins/prompt/prompt.js +15 -17
package/src/plugins/rm/README.md +4 -4
package/src/plugins/rm/rm.js +32 -20
package/src/plugins/rm/rmDoc.js +30 -0
package/src/plugins/rpc/README.md +15 -28
package/src/plugins/rpc/rpc.js +42 -77
package/src/plugins/set/README.md +13 -12
package/src/plugins/set/set.js +107 -16
package/src/plugins/set/setDoc.js +49 -0
package/src/plugins/sh/README.md +4 -4
package/src/plugins/sh/sh.js +5 -5
package/src/plugins/sh/shDoc.js +29 -0
package/src/plugins/{skills/skills.js → skill/skill.js} +10 -51
package/src/plugins/summarize/README.md +6 -5
package/src/plugins/summarize/summarize.js +7 -6
package/src/plugins/summarize/summarizeDoc.js +33 -0
package/src/plugins/telemetry/telemetry.js +16 -9
package/src/plugins/think/README.md +20 -0
package/src/plugins/think/think.js +5 -0
package/src/plugins/unknown/README.md +6 -5
package/src/plugins/unknown/unknown.js +12 -9
package/src/plugins/unknown/unknownDoc.js +31 -0
package/src/plugins/update/README.md +3 -8
package/src/plugins/update/update.js +7 -6
package/src/plugins/update/updateDoc.js +33 -0
package/src/server/ClientConnection.js +59 -45
package/src/server/RpcRegistry.js +52 -4
package/src/sql/v_model_context.sql +10 -25
package/src/plugins/ask_user/docs.md +0 -2
package/src/plugins/cp/docs.md +0 -2
package/src/plugins/current/README.md +0 -14
package/src/plugins/current/current.js +0 -47
package/src/plugins/env/docs.md +0 -4
package/src/plugins/get/docs.md +0 -10
package/src/plugins/known/docs.md +0 -3
package/src/plugins/mv/docs.md +0 -2
package/src/plugins/rm/docs.md +0 -6
package/src/plugins/set/docs.md +0 -6
package/src/plugins/sh/docs.md +0 -2
package/src/plugins/skills/README.md +0 -25
package/src/plugins/store/README.md +0 -20
package/src/plugins/store/docs.md +0 -6
package/src/plugins/store/store.js +0 -63
package/src/plugins/summarize/docs.md +0 -4
package/src/plugins/unknown/docs.md +0 -5
package/src/plugins/update/docs.md +0 -4

package/src/plugins/sh/README.md CHANGED Viewed

@@ -5,9 +5,8 @@ Proposes shell command execution for client approval.
 ## Registration
 - **Tool**: `sh`
-- **Modes**: act only
-- **Category**: act
-- **Handler**: Upserts the entry as `proposed` state. The client must approve execution.
+- **Category**: `logging`
+- **Handler**: Upserts the entry at status 202 (proposed). The client must approve execution.
 ## Projection
@@ -15,4 +14,5 @@ Shows `sh {command}` followed by the entry body.
 ## Behavior
-All shell commands require client-side approval — nothing executes server-side. Act mode only; blocked in ask mode.
+All shell commands require client-side approval — nothing executes
+server-side. Act mode only; excluded in ask mode by `resolveForLoop`.

package/src/plugins/sh/sh.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { readFileSync } from "node:fs";
+import docs from "./shDoc.js";
 export default class Sh {
 	#core;
@@ -9,10 +9,10 @@ export default class Sh {
 		core.on("handler", this.handler.bind(this));
 		core.on("full", this.full.bind(this));
 		core.on("summary", this.summary.bind(this));
-		const docs = readFileSync(new URL("./docs.md", import.meta.url), "utf8");
-		core.filter("instructions.toolDocs", async (content) =>
-			content ? `${content}\n\n${docs}` : docs,
-		);
+		core.filter("instructions.toolDocs", async (docsMap) => {
+			docsMap.sh = docs;
+			return docsMap;
+		});
 	}
 	async handler(entry, rummy) {

package/src/plugins/sh/shDoc.js ADDED Viewed

@@ -0,0 +1,29 @@
+// Tool doc for <sh>. Each entry: [text, rationale].
+// Text goes to the model. Rationale stays in source.
+// Changing ANY line requires reading ALL rationales first.
+const LINES = [
+	// --- Syntax
+	["## <sh>[command]</sh> - Run a shell command with side effects"],
+	// --- Examples: install and test — real mutations
+	[
+		"Example: <sh>npm install express</sh>",
+		"Package install. Shows a real side-effect command.",
+	],
+	[
+		"Example: <sh>npm test</sh>",
+		"Test execution. Another common side-effect action.",
+	],
+	// --- Constraints
+	[
+		"* YOU MUST NOT use <sh/> to read, create, or edit files — use <get/> and <set/>",
+		"Forces file operations through the entry system. Prevents untracked mutations.",
+	],
+	[
+		"* YOU MUST use <env/> for commands without side effects",
+		"Reinforces the env/sh split. Read = env, mutate = sh.",
+	],
+];
+export default LINES.map(([text]) => text).join("\n");

package/src/plugins/{skills/skills.js → skill/skill.js} RENAMED Viewed

@@ -1,15 +1,17 @@
 import fs from "node:fs/promises";
 import { join } from "node:path";
-export default class Skills {
+export default class Skill {
 	#core;
 	constructor(core) {
 		this.#core = core;
 		core.registerScheme({
 			name: "skill",
-			category: "knowledge",
+			category: "data",
 		});
+		core.hooks.tools.onView("skill", (entry) => entry.body);
 		const r = core.hooks.rpc.registry;
 		r.register("skill/add", {
@@ -22,19 +24,12 @@ export default class Skills {
 				const body = await loadFile("skills", params.name);
 				const store = ctx.projectAgent.entries;
-				await store.upsert(
-					runRow.id,
-					runRow.next_turn,
-					`skill://${params.name}`,
-					body,
-					200,
-					{
-						attributes: {
-							name: params.name,
-							source: filePath("skills", params.name),
-						},
+				await store.upsert(runRow.id, 0, `skill://${params.name}`, body, 200, {
+					attributes: {
+						name: params.name,
+						source: filePath("skills", params.name),
 					},
-				);
+				});
 				return { status: "ok", skill: params.name };
 			},
@@ -97,43 +92,7 @@ export default class Skills {
 			requiresInit: true,
 		});
-		r.register("persona/set", {
-			handler: async (params, ctx) => {
-				if (!params.run) throw new Error("run is required");
-				const runRow = await ctx.db.get_run_by_alias.get({ alias: params.run });
-				if (!runRow) throw new Error(`Run not found: ${params.run}`);
-				let text = params.text;
-				if (params.name && !text) {
-					text = await loadFile("personas", params.name);
-				}
-				await ctx.db.update_run_config.run({
-					id: runRow.id,
-					temperature: null,
-					persona: text || null,
-					context_limit: null,
-					model: null,
-				});
-				return { status: "ok" };
-			},
-			description:
-				"Set persona on a run. Pass name or text. Pass neither to clear.",
-			params: {
-				run: "string — run alias",
-				name: "string? — persona filename (without .md)",
-				text: "string? — raw persona text (overrides name)",
-			},
-			requiresInit: true,
-		});
-		r.register("listPersonas", {
-			handler: async () => listAvailable("personas"),
-			description: "List available persona files. Returns [{ name, path }].",
-			requiresInit: true,
-		});
+		// Persona methods extracted to persona plugin.
 	}
 }

package/src/plugins/summarize/README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 # summarize
-Structural tool for model-generated summaries.
+Lifecycle signal — the model declares it has completed the task.
 ## Registration
 - **Tool**: `summarize`
-- **Modes**: ask, act
-- **Category**: structural
-- **Handler**: None — projection only.
+- **Category**: `logging`
+- **Handler**: None — recorded by TurnExecutor as a lifecycle signal.
 ## Projection
@@ -15,4 +14,6 @@ Shows `summarize` followed by the entry body.
 ## Behavior
-No handler logic. The tool registration exists so the model can emit summary entries that appear in context via projection.
+If the model sends `<summarize>` but actions in the same turn failed,
+TurnExecutor overrides it to `<update>` — the model's assertion that
+it's done is false.

package/src/plugins/summarize/summarize.js CHANGED Viewed

@@ -1,17 +1,18 @@
-import { readFileSync } from "node:fs";
+import docs from "./summarizeDoc.js";
 export default class Summarize {
 	#core;
 	constructor(core) {
 		this.#core = core;
-		core.registerScheme({ category: "structural" });
+		core.ensureTool();
+		core.registerScheme({ category: "logging" });
 		core.on("full", this.full.bind(this));
 		core.on("summary", this.summary.bind(this));
-		const docs = readFileSync(new URL("./docs.md", import.meta.url), "utf8");
-		core.filter("instructions.toolDocs", async (content) =>
-			content ? `${content}\n\n${docs}` : docs,
-		);
+		core.filter("instructions.toolDocs", async (docsMap) => {
+			docsMap.summarize = docs;
+			return docsMap;
+		});
 	}
 	full(entry) {

package/src/plugins/summarize/summarizeDoc.js ADDED Viewed

@@ -0,0 +1,33 @@
+// Tool doc for <summarize>. Each entry: [text, rationale].
+// Text goes to the model. Rationale stays in source.
+// Changing ANY line requires reading ALL rationales first.
+const LINES = [
+	// --- Syntax
+	["## <summarize>[answer or summary]</summarize> - Signal completion"],
+	// --- Examples: answer and task completion
+	[
+		"Example: <summarize>The port is 8080</summarize>",
+		"Direct answer. Shows summarize as the vehicle for delivering answers.",
+	],
+	[
+		"Example: <summarize>Installed express, updated config</summarize>",
+		"Task summary. Shows summarize for action completion.",
+	],
+	// --- Constraints: RFC-style MUST/MUST NOT
+	[
+		"* YOU MUST use <summarize> when done — describes the final state",
+		"Completion signal. Without this, the loop continues indefinitely.",
+	],
+	[
+		"* YOU MUST NOT use <summarize> if still working — use <update/> instead",
+		"Mutual exclusion with update. Prevents premature completion.",
+	],
+	[
+		"* YOU MUST keep <summarize> to <= 80 characters",
+		"Length cap. Matches the summary attribute constraint. Prevents verbose output.",
+	],
+];
+export default LINES.map(([text]) => text).join("\n");

package/src/plugins/telemetry/telemetry.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { writeFileSync } from "node:fs";
+import { writeFile } from "node:fs/promises";
 import { join } from "node:path";
 export default class Telemetry {
@@ -75,8 +75,8 @@ export default class Telemetry {
 		result,
 		responseMessage,
 		content,
-		commands,
 		unparsed,
+		assembledTokens,
 		systemMsg,
 		userMsg,
 	}) {
@@ -85,17 +85,20 @@ export default class Telemetry {
 		// assistant://N — the model's raw response
 		await store.upsert(runId, turn, `assistant://${turn}`, content, 200, {
 			loopId,
+			fidelity: "archive",
 		});
 		// system://N, user://N — assembled messages as audit
 		if (systemMsg) {
 			await store.upsert(runId, turn, `system://${turn}`, systemMsg, 200, {
 				loopId,
+				fidelity: "archive",
 			});
 		}
 		if (userMsg) {
 			await store.upsert(runId, turn, `user://${turn}`, userMsg, 200, {
 				loopId,
+				fidelity: "archive",
 			});
 		}
@@ -112,7 +115,7 @@ export default class Telemetry {
 				model: result.model || null,
 			}),
 			200,
-			{ loopId },
+			{ loopId, fidelity: "archive" },
 		);
 		// reasoning://N
@@ -123,7 +126,7 @@ export default class Telemetry {
 				`reasoning://${turn}`,
 				responseMessage.reasoning_content,
 				200,
-				{ loopId },
+				{ loopId, fidelity: "archive" },
 			);
 		}
@@ -131,6 +134,7 @@ export default class Telemetry {
 		if (unparsed) {
 			await store.upsert(runId, turn, `content://${turn}`, unparsed, 200, {
 				loopId,
+				fidelity: "archive",
 			});
 		}
@@ -147,8 +151,13 @@ export default class Telemetry {
 			usage.completion_tokens_details?.reasoning_tokens ||
 			usage.output_tokens_details?.reasoning_tokens ||
 			0;
+		// Use LLM's actual prompt_tokens as the ground-truth context size when available.
+		// This back-fills context_tokens so get_last_context_tokens reflects reality for the next turn.
+		const actualContextTokens = usage.prompt_tokens || assembledTokens || 0;
 		await rummy.db.update_turn_stats.run({
 			id: rummy.turnId,
+			context_tokens: actualContextTokens,
+			reasoning_content: responseMessage?.reasoning_content || null,
 			prompt_tokens: usage.prompt_tokens ?? 0,
 			cached_tokens: cachedTokens ?? 0,
 			completion_tokens: usage.completion_tokens ?? 0,
@@ -187,10 +196,8 @@ export default class Telemetry {
 	#flush() {
 		if (!this.#lastRunPath || this.#turnLog.length === 0) return;
-		try {
-			writeFileSync(this.#lastRunPath, `${this.#turnLog.join("\n")}\n`);
-		} catch {
-			// RUMMY_HOME may not exist yet
-		}
+		writeFile(this.#lastRunPath, `${this.#turnLog.join("\n")}\n`).catch(
+			() => {},
+		);
 	}
 }

package/src/plugins/think/README.md ADDED Viewed

@@ -0,0 +1,20 @@
+# think
+Provides a `<think>` tag for model reasoning. Not a tool — does not
+appear in the tool list.
+## Registration
+- **Scheme**: `think` — `category: "logging"`, `model_visible: 0`
+- **No handler, no view, no tool registration**
+## Behavior
+The model writes `<think>reasoning</think>` before tool commands.
+XmlParser captures it, TurnExecutor records it as a `think://` entry.
+Invisible to the model on subsequent turns (`model_visible: 0`).
+Available for debugging and audit.
+Models with server-side reasoning (extended thinking) use that
+capability independently. The `<think>` tag is a floor — every model
+gets at least this.

package/src/plugins/think/think.js ADDED Viewed

@@ -0,0 +1,5 @@
+export default class Think {
+	constructor(core) {
+		core.registerScheme({ modelVisible: 0, category: "logging" });
+	}
+}

package/src/plugins/unknown/README.md CHANGED Viewed

@@ -7,9 +7,9 @@ The Rumsfeld mechanism. The model registers what it doesn't know before acting.
 ## Registration
 - **Tool**: `unknown`
-- **Modes**: ask, act
-- **Category**: structural
+- **Category**: `unknown`
 - **Handler**: None — recorded by TurnExecutor, deduplicated against existing unknowns.
+- **Filter**: `assembly.system` at priority 300 — renders `<unknowns>` section.
 ## Projection
@@ -18,6 +18,7 @@ The Rumsfeld mechanism. The model registers what it doesn't know before acting.
 ## Behavior
 Unknowns are sticky — they persist across turns until the model explicitly
-stores or removes them. The model investigates unknowns using `<get>`,
-`<env>`, or `<ask_user>`, then removes resolved ones with `<rm>`.
-Server deduplicates on insert.
+removes them with `<rm>`. The model investigates unknowns using `<get>`,
+`<env>`, or `<ask_user>`, then removes resolved ones. Server deduplicates
+on insert. Each unknown renders with turn, fidelity, and tokens for
+temporal reasoning and context management.

package/src/plugins/unknown/unknown.js CHANGED Viewed

@@ -1,19 +1,20 @@
-import { readFileSync } from "node:fs";
+import docs from "./unknownDoc.js";
 export default class Unknown {
 	#core;
 	constructor(core) {
 		this.#core = core;
+		core.ensureTool();
 		core.registerScheme({
-			category: "knowledge",
+			category: "unknown",
 		});
 		core.on("full", this.full.bind(this));
 		core.filter("assembly.system", this.assembleUnknowns.bind(this), 300);
-		const docs = readFileSync(new URL("./docs.md", import.meta.url), "utf8");
-		core.filter("instructions.toolDocs", async (content) =>
-			content ? `${content}\n\n${docs}` : docs,
-		);
+		core.filter("instructions.toolDocs", async (docsMap) => {
+			docsMap.unknown = docs;
+			return docsMap;
+		});
 	}
 	full(entry) {
@@ -24,9 +25,11 @@ export default class Unknown {
 		const entries = ctx.rows.filter((r) => r.category === "unknown");
 		if (entries.length === 0) return content;
-		const lines = entries.map(
-			(u) => `<unknown path="${u.path}">${u.body}</unknown>`,
-		);
+		const lines = entries.map((u) => {
+			const fidelity = u.fidelity ? ` fidelity="${u.fidelity}"` : "";
+			const tokens = u.tokens ? ` tokens="${u.tokens}"` : "";
+			return `<unknown path="${u.path}" turn="${u.source_turn || u.turn}"${fidelity}${tokens}>${u.body}</unknown>`;
+		});
 		return `${content}\n\n<unknowns>\n${lines.join("\n")}\n</unknowns>`;
 	}
 }

package/src/plugins/unknown/unknownDoc.js ADDED Viewed

@@ -0,0 +1,31 @@
+// Tool doc for <unknown>. Each entry: [text, rationale].
+// Text goes to the model. Rationale stays in source.
+// Changing ANY line requires reading ALL rationales first.
+const LINES = [
+	// --- Syntax: body = what you need to learn
+	[
+		`## <unknown>[specific thing I need to learn]</unknown> - Track open questions`,
+	],
+	// --- Examples: concrete unknowns, not abstract
+	[
+		`Example: <unknown path="unknown://answer">contents of answer.txt</unknown>`,
+		`Specific and actionable. Shows that unknowns are concrete investigation targets.`,
+	],
+	[
+		`Example: <unknown>which database adapter is configured</unknown>`,
+		`Domain question. Shows unknowns for configuration/architecture questions.`,
+	],
+	// --- Lifecycle: register → investigate → resolve
+	[
+		`* Investigate with Tool Commands`,
+		`Cross-tool lifecycle: unknowns drive get/env/ask_user actions.`,
+	],
+	[
+		`* When resolved or irrelevant, remove with <rm path="unknown://..."/>`,
+		`Cross-tool lifecycle: rm cleans resolved unknowns from context.`,
+	],
+];
+export default LINES.map(([text]) => text).join("\n");

package/src/plugins/update/README.md CHANGED Viewed

@@ -1,18 +1,13 @@
 # update
-Structural tool for model-generated progress updates.
+Lifecycle signal — the model declares it has more work to do.
 ## Registration
 - **Tool**: `update`
-- **Modes**: ask, act
-- **Category**: structural
-- **Handler**: None — projection only.
+- **Category**: `logging`
+- **Handler**: None — recorded by TurnExecutor as a lifecycle signal.
 ## Projection
 Shows `update` followed by the entry body.
-## Behavior
-No handler logic. Allows the model to emit progress/status entries that appear in context via projection.

package/src/plugins/update/update.js CHANGED Viewed

@@ -1,17 +1,18 @@
-import { readFileSync } from "node:fs";
+import docs from "./updateDoc.js";
 export default class Update {
 	#core;
 	constructor(core) {
 		this.#core = core;
-		core.registerScheme({ category: "structural" });
+		core.ensureTool();
+		core.registerScheme({ category: "logging" });
 		core.on("full", this.full.bind(this));
 		core.on("summary", this.summary.bind(this));
-		const docs = readFileSync(new URL("./docs.md", import.meta.url), "utf8");
-		core.filter("instructions.toolDocs", async (content) =>
-			content ? `${content}\n\n${docs}` : docs,
-		);
+		core.filter("instructions.toolDocs", async (docsMap) => {
+			docsMap.update = docs;
+			return docsMap;
+		});
 	}
 	full(entry) {

package/src/plugins/update/updateDoc.js ADDED Viewed

@@ -0,0 +1,33 @@
+// Tool doc for <update>. Each entry: [text, rationale].
+// Text goes to the model. Rationale stays in source.
+// Changing ANY line requires reading ALL rationales first.
+const LINES = [
+	// --- Syntax
+	["## <update>[brief status]</update> - Signal continuation"],
+	// --- Examples: research progress and multi-step work
+	[
+		"Example: <update>Reading config files</update>",
+		"Progress checkpoint. Shows update as a status signal, not a log entry.",
+	],
+	[
+		"Example: <update>Found 3 issues, fixing first</update>",
+		"Multi-step progress. Shows update for ongoing work.",
+	],
+	// --- Constraints: RFC-style MUST/MUST NOT
+	[
+		"* YOU MUST use <update> if still working — describes the current state",
+		"Continuation signal. Triggers the next turn in the loop.",
+	],
+	[
+		"* YOU MUST NOT use <update> if done — use <summarize/> instead",
+		"Mutual exclusion with summarize. Prevents infinite loops.",
+	],
+	[
+		"* YOU MUST keep <update> to <= 80 characters",
+		"Length cap. Prevents models from writing essays in status updates.",
+	],
+];
+export default LINES.map(([text]) => text).join("\n");