npm - @possumtech/rummy - Versions diffs - 0.4.0 → 0.5.0 - Mend

@possumtech/rummy 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/.env.example +1 -0
package/FIDELITY_CONTRACT.md +172 -0
package/migrations/001_initial_schema.sql +3 -3
package/package.json +1 -1
package/src/agent/AgentLoop.js +1 -2
package/src/agent/ContextAssembler.js +2 -0
package/src/agent/KnownStore.js +1 -2
package/src/agent/ResponseHealer.js +54 -1
package/src/agent/TurnExecutor.js +51 -6
package/src/agent/XmlParser.js +150 -41
package/src/agent/known_store.sql +18 -11
package/src/hooks/PluginContext.js +8 -2
package/src/hooks/RummyContext.js +6 -3
package/src/hooks/ToolRegistry.js +23 -27
package/src/plugins/ask_user/ask_user.js +2 -2
package/src/plugins/ask_user/ask_userDoc.js +4 -2
package/src/plugins/budget/README.md +6 -4
package/src/plugins/budget/budget.js +29 -9
package/src/plugins/cp/cp.js +5 -5
package/src/plugins/cp/cpDoc.js +0 -8
package/src/plugins/engine/engine.sql +1 -1
package/src/plugins/env/env.js +4 -4
package/src/plugins/env/envDoc.js +2 -2
package/src/plugins/file/file.js +2 -7
package/src/plugins/get/get.js +31 -10
package/src/plugins/get/getDoc.js +26 -37
package/src/plugins/helpers.js +2 -2
package/src/plugins/instructions/instructions.js +6 -5
package/src/plugins/instructions/preamble.md +41 -33
package/src/plugins/known/known.js +17 -16
package/src/plugins/known/knownDoc.js +1 -13
package/src/plugins/mv/mv.js +6 -6
package/src/plugins/mv/mvDoc.js +2 -13
package/src/plugins/previous/previous.js +10 -14
package/src/plugins/progress/progress.js +22 -5
package/src/plugins/prompt/prompt.js +14 -11
package/src/plugins/rm/rm.js +4 -4
package/src/plugins/rm/rmDoc.js +4 -8
package/src/plugins/rpc/rpc.js +1 -1
package/src/plugins/set/set.js +10 -12
package/src/plugins/set/setDoc.js +4 -4
package/src/plugins/sh/sh.js +4 -4
package/src/plugins/sh/shDoc.js +2 -2
package/src/plugins/skill/skill.js +2 -1
package/src/plugins/summarize/summarize.js +2 -2
package/src/plugins/summarize/summarizeDoc.js +9 -10
package/src/plugins/telemetry/telemetry.js +36 -11
package/src/plugins/think/think.js +2 -1
package/src/plugins/think/thinkDoc.js +3 -5
package/src/plugins/unknown/unknown.js +21 -14
package/src/plugins/unknown/unknownDoc.js +2 -6
package/src/plugins/update/update.js +2 -2
package/src/plugins/update/updateDoc.js +9 -6
package/src/sql/functions/slugify.js +13 -1
package/src/sql/v_model_context.sql +3 -3

package/.env.example CHANGED Viewed

@@ -23,6 +23,7 @@ RUMMY_MAX_STALLS=3
 RUMMY_MIN_CYCLES=3
 RUMMY_MAX_CYCLE_PERIOD=4
 RUMMY_MAX_UPDATE_REPEATS=3
+RUMMY_MAX_PATH_STAGNATION=5
 # Hygiene
 # Days to keep completed/aborted runs before purging

package/FIDELITY_CONTRACT.md ADDED Viewed

@@ -0,0 +1,172 @@
+# Fidelity Contract — Observed State vs Intended
+## Observed Behavior (traced from test/mab/results/2026-04-14T15-13-55-950Z/last_run.txt, turn 24)
+### Flow
+```
+Model emits tool
+      ↓
+Tool handler stores body in known_entries.body (raw, as model wrote it)
+      ↓
+Next turn: TurnExecutor materializes context
+      ↓
+For each row: hooks.tools.view(scheme, entry) → plugin's view hook returns projected body
+      ↓
+Projected body stored in turn_context.body with fidelity-projected token count
+      ↓
+Assembly phase: section renderers (knowns, unknowns, previous, performed) pull from ctx.rows (which has projected body) and render tags
+      ↓
+Model sees the assembled <knowns>, <previous>, etc. sections in the system prompt
+```
+### Fidelity Values (from code)
+- **full**: fully shown
+- **summary**: "compact" shown — but WHAT "compact" means varies per plugin
+- **archive**: excluded by `v_model_context` SQL before reaching any renderer (clean)
+## Three Breaks in the Intended Contract
+### Break 1 — Plugins disagree on what summary means
+Every plugin that registers view hooks decides what body to project per fidelity. Observed:
+| Plugin | full() | summary() |
+|--------|--------|-----------|
+| known | `# known ${path}\n${body}` | **same as full** (wrong) |
+| prompt | `body` | **500-char truncation + marker** (correct) |
+| budget | `body` | `body` (ok — budget is naturally short) |
+| skill | `body` | `body` (inherited default) |
+| unknown | varies — needs audit | needs audit |
+| others | needs audit | needs audit |
+The `known` plugin's `summary()` returning the full body is a direct contract violation. The summary view should return a compact representation of the entry, not the same full body.
+### Break 2 — Renderers re-apply fidelity logic
+Two renderers currently re-check entry fidelity and override the plugin's projection:
+**`known.js` `renderKnownTag`** (lines 111-115):
+```js
+if (entry.fidelity === "archive") return "";
+if (entry.fidelity === "summary") {
+    return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}/>`;
+}
+return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}>${entry.body}</${tag}>`;
+```
+This ignores entry.body at summary fidelity and renders self-closing. It's a workaround for known.summary() returning the wrong content. Belt over broken suspenders.
+**`previous.js` `renderToolTag`** (my edit this session):
+```js
+if (entry.fidelity === "full") {
+    return `<${entry.scheme} ${attrs}>${body}</${entry.scheme}>`;
+}
+// summary: self-closing with summary attr
+```
+I added this fidelity re-check when I should have trusted the plugin's projected body. Same mistake as known, added today.
+### Break 3 — Model writes scheme headers into body
+Every known/update/unknown entry in the DB has a body that starts with `# known known://path\n`, `# update\n`, or `# unknown\n`. The model writes this because the examples in the system prompt render tags with the body prefixed by `# ${scheme} ${path}\n`.
+Then the plugin's `full()` hook prepends ANOTHER `# ${scheme} ${path}\n` when projecting. Result: duplicate headers in the rendered output.
+Observed in turn 16 update body: `"# update\n# update\nDocuments 20-22 indexed and archived."`
+And in unknown paths: the slug-generation for pathless unknowns takes the body including the `# unknown\n` prefix, resulting in URL-encoded paths like:
+```
+unknown://%23%20unknown%0ADocument%2023%20is%20missing%20from%20the%20prompt.
+```
+## The Intended Contract
+Based on the user's stated philosophy ("surface problems, don't solve them; plugin decides, renderer renders"):
+### Layer 1 — Plugin decides per fidelity
+Each plugin registers view hooks that return the body content for each fidelity value:
+```js
+core.hooks.tools.onView("known", (entry) => entry.body, "full");
+core.hooks.tools.onView("known", (entry) => "", "summary");
+```
+At archive, no view hook is called (v_model_context excludes them).
+### Layer 2 — Renderer shows the projected body
+Renderers take the projected body from `ctx.rows[].body`:
+- If non-empty, wrap in tag with body
+- If empty, render self-closing tag
+Renderers do NOT re-check entry.fidelity. They trust the plugin's projection.
+### Layer 3 — Tag attributes always present
+Tag attributes visible in both full and summary rendering:
+- `path` — always
+- `summary` — if present in entry.attributes.summary
+- `turn` — if source_turn is set
+- `status` — if status is set
+- `fidelity` — always (the value itself)
+- `tokens` — always (full-cost value, unchanged by fidelity per `set_fidelity` SQL)
+### Per-plugin view decisions (revised)
+| Plugin | Category | Full body | Summary body | Notes |
+|--------|----------|-----------|--------------|-------|
+| known | data | `entry.body` (no `# known` prefix) | `""` | Tag's summary attr carries the keywords |
+| unknown | unknown | `entry.body` | `""` | Same pattern as known/skill — summary attr carries the label |
+| prompt | prompt | `entry.body` | 500-char truncation with `[truncated...]` | Current behavior is correct |
+| budget | logging | `entry.body` | `entry.body` | Feedback signal — always full |
+| update | logging | `entry.body` | `entry.body` | Already 80-char capped |
+| summarize | logging | `entry.body` | `entry.body` | Already 80-char capped |
+| get | logging | result body | `""` | Just the action tag at summary |
+| set, rm, cp, mv | logging | result body | `""` | Just the action tag at summary |
+| env, sh | logging | output | `""` | Just the action tag at summary |
+| search | logging | results | `""` | Just the action tag at summary |
+| skill | data | `entry.body` | `""` | Same as known |
+| file | data | `entry.body` | `""` | Same as known |
+| http, https | data | — | — | **Move to rummy.web plugin** — not in core |
+## The Body-Header Problem
+Separate from fidelity: the model writes `# scheme path` into the body because examples show that shape. Plugin view hooks then prepend another header.
+**Rule**: `# scheme` prefix belongs only in **logging** scheme outputs (tool execution results where the prefix identifies the log entry type). Non-logging schemes (known, unknown, prompt, data entries) should have no body prefix — tag attributes identify the entry.
+**What to remove**:
+- `known.js` `full()`: remove `# known ${entry.path}\n` prefix — just return `entry.body`
+- `unknown.js` `full()`: remove any `# unknown\n` prefix
+- Tooldoc examples for known/unknown that show bodies starting with `# scheme path` — remove so model stops copying
+**What to keep**:
+- Logging plugins (update, summarize, budget, get, set, etc.) may keep `# scheme` prefixes if present — they're describing tool execution results.
+## Test Plan
+To enforce the contract:
+1. **Per-plugin unit tests**: Each plugin with fidelity-sensitive views tests `full(entry)` and `summary(entry)` return the expected content.
+2. **Renderer tests**: Each section renderer (knowns, previous, performed, unknowns) tests that it trusts `entry.body` without re-checking fidelity.
+3. **Integration test**: Load a DB with entries at each fidelity, assemble context, verify:
+   - Archive entries absent from any section
+   - Summary entries visible as compact tags
+   - Full entries visible with body
+   - No double headers in bodies
+4. **Contract lint**: Grep for `entry.fidelity ===` in renderer files — should have zero matches.
+## Deliverable Order
+Before touching code, this document should be reviewed. Once aligned, the fix order would be:
+1. Fix plugin view hooks to return correct body per fidelity
+2. Remove fidelity re-checks from renderers
+3. Remove the `# scheme path` header prepending (plugin-side) and examples (tooldoc-side)
+4. Write tests per the plan above
+5. Regenerate a sample context packet to confirm clean output
+No silent interventions. No belt-and-suspenders logic. Plugin projects, renderer renders, model sees honest representation.

package/migrations/001_initial_schema.sql CHANGED Viewed

@@ -124,8 +124,8 @@ CREATE TABLE IF NOT EXISTS known_entries (
 	, body TEXT NOT NULL DEFAULT ''
 	, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
 	, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
-	, fidelity TEXT NOT NULL DEFAULT 'full' CHECK (
-		fidelity IN ('full', 'summary', 'archive')
+	, fidelity TEXT NOT NULL DEFAULT 'promoted' CHECK (
+		fidelity IN ('promoted', 'demoted', 'archived')
 	)
 	, hash TEXT
 	, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
@@ -166,7 +166,7 @@ CREATE TABLE IF NOT EXISTS turn_context (
 	, path TEXT NOT NULL
 	, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
 	, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
-	, fidelity TEXT NOT NULL CHECK (fidelity IN ('full', 'summary'))
+	, fidelity TEXT NOT NULL CHECK (fidelity IN ('promoted', 'demoted'))
 	, body TEXT NOT NULL DEFAULT ''
 	, tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
 	, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@possumtech/rummy",
-	"version": "0.4.0",
+	"version": "0.5.0",
 	"description": "Relational Unknowns Memory Management Yoke",
 	"keywords": [
 		"llm"

package/src/agent/AgentLoop.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { advanceRecovery } from "../plugins/budget/recovery.js";
-import KnownStore from "./KnownStore.js";
 import msg from "./messages.js";
 import ResponseHealer from "./ResponseHealer.js";
@@ -365,7 +364,7 @@ export default class AgentLoop {
 					await this.#knownStore.setFidelity(
 						currentRunId,
 						ra.promptPath,
-						"full",
+						"promoted",
 					);
 				}
 				if (ra.action === "hard413") {

package/src/agent/ContextAssembler.js CHANGED Viewed

@@ -14,6 +14,7 @@ export default class ContextAssembler {
 			toolSet = null,
 			lastContextTokens = 0,
 			turn = 1,
+			baselineTokens = 0,
 		} = {},
 		hooks,
 	) {
@@ -32,6 +33,7 @@ export default class ContextAssembler {
 			demoted,
 			toolSet,
 			turn,
+			baselineTokens,
 		};
 		const system = await hooks.assembly.system.filter(systemPrompt, ctx);

package/src/agent/KnownStore.js CHANGED Viewed

@@ -84,7 +84,7 @@ export default class KnownStore {
 		body,
 		status,
 		{
-			fidelity = "full",
+			fidelity = "promoted",
 			attributes = null,
 			hash = null,
 			updatedAt = null,
@@ -247,7 +247,6 @@ export default class KnownStore {
 		this.#emitChanged(runId, "prompt://batch", "fidelity");
 	}
 	async getLog(runId) {
 		return this.#db.get_results.all({ run_id: runId });
 	}

package/src/agent/ResponseHealer.js CHANGED Viewed

@@ -2,6 +2,8 @@ const MAX_STALLS = Number(process.env.RUMMY_MAX_STALLS) || 3;
 const MIN_CYCLES = Number(process.env.RUMMY_MIN_CYCLES) || 3;
 const MAX_CYCLE_PERIOD = Number(process.env.RUMMY_MAX_CYCLE_PERIOD) || 4;
 const MAX_UPDATE_REPEATS = Number(process.env.RUMMY_MAX_UPDATE_REPEATS) || 3;
+const MAX_PATH_STAGNATION =
+	Number(process.env.RUMMY_MAX_PATH_STAGNATION) || 5;
 /**
  * Build a stable fingerprint for a single recorded entry.
@@ -47,11 +49,28 @@ function detectCycle(history) {
 	return { detected: false };
 }
+/**
+ * Extract the target paths a command touches for stagnation detection.
+ * Same target logic as cmdFingerprint but returns the raw path for set
+ * comparison across turns.
+ */
+function cmdPaths(entry) {
+	const attrs = entry.attributes ?? {};
+	const paths = [];
+	if (attrs.path) paths.push(attrs.path);
+	if (attrs.to) paths.push(attrs.to);
+	if (attrs.command) paths.push(attrs.command);
+	if (attrs.query) paths.push(attrs.query);
+	if (attrs.question) paths.push(attrs.question);
+	return paths;
+}
 export default class ResponseHealer {
 	#stallCount = 0;
 	#turnHistory = [];
 	#lastUpdateText = null;
 	#updateRepeatCount = 0;
+	#pathRuns = new Map(); // path → consecutive turns touched
 	/**
 	 * Heal a missing status tag. Called when the model emits
@@ -67,8 +86,15 @@ export default class ResponseHealer {
 	static healStatus(content, commands) {
 		const trimmed = content.trim();
+		// Detect malformed-glitch content — model attempted a tool invocation
+		// (native call, malformed XML, etc.) that the parser couldn't dispatch.
+		// This is NOT an answer; it's a glitch that deserves the 3-strikes
+		// stall path so the model can recover. Without this check, the model
+		// emits one malformed call and the run terminates after a single turn.
+		const looksGlitched = /<\|tool_call>|<tool_call\|>/.test(trimmed);
 		// No commands + plain text = answered. Treat as summary.
-		if (commands.length === 0 && trimmed) {
+		if (commands.length === 0 && trimmed && !looksGlitched) {
 			console.warn("[RUMMY] Healed: plain text response treated as summary");
 			return { summaryText: trimmed.slice(0, 500), updateText: null };
 		}
@@ -120,6 +146,32 @@ export default class ResponseHealer {
 			return { continue: false, reason };
 		}
+		// Distinct-paths stagnation: the model might vary commands turn-to-turn
+		// (avoiding exact-cycle detection) but still churn on a single path.
+		// Track per-path consecutive touches; flag if any path is touched in
+		// MAX_PATH_STAGNATION consecutive turns. Catches semantic stagnation
+		// where the fingerprints differ in micro-detail but the work is stuck
+		// on one entry (e.g. endlessly re-setting/re-getting the same plan).
+		const touchedPaths = new Set();
+		for (const cmd of commands) {
+			for (const p of cmdPaths(cmd)) touchedPaths.add(p);
+		}
+		// Paths not touched this turn — run broken, remove from map.
+		for (const path of [...this.#pathRuns.keys()]) {
+			if (!touchedPaths.has(path)) this.#pathRuns.delete(path);
+		}
+		// Paths touched this turn — increment run.
+		for (const path of touchedPaths) {
+			this.#pathRuns.set(path, (this.#pathRuns.get(path) || 0) + 1);
+		}
+		for (const [path, run] of this.#pathRuns) {
+			if (run >= MAX_PATH_STAGNATION) {
+				const reason = `Path stagnation: ${path} touched ${run} consecutive turns`;
+				console.warn(`[RUMMY] ${reason}. Force-completing.`);
+				return { continue: false, reason };
+			}
+		}
 		return { continue: true };
 	}
@@ -184,5 +236,6 @@ export default class ResponseHealer {
 		this.#turnHistory = [];
 		this.#lastUpdateText = null;
 		this.#updateRepeatCount = 0;
+		this.#pathRuns = new Map();
 	}
 }

package/src/agent/TurnExecutor.js CHANGED Viewed

@@ -1,12 +1,19 @@
 import RummyContext from "../hooks/RummyContext.js";
 import ContextAssembler from "./ContextAssembler.js";
-import KnownStore from "./KnownStore.js";
-import msg from "./messages.js";
 import ResponseHealer from "./ResponseHealer.js";
 import { countTokens } from "./tokens.js";
 import XmlParser from "./XmlParser.js";
-const ACTION_SCHEMES = new Set(["get", "set", "rm", "mv", "cp", "sh", "env", "search"]);
+const ACTION_SCHEMES = new Set([
+	"get",
+	"set",
+	"rm",
+	"mv",
+	"cp",
+	"sh",
+	"env",
+	"search",
+]);
 const MUTATION_SCHEMES = new Set(["set", "rm", "sh", "mv", "cp"]);
 const READ_SCHEMES = new Set(["get", "env", "search"]);
@@ -58,7 +65,12 @@ export default class TurnExecutor {
 				fidelity: row.fidelity,
 				status: row.status,
 				body: projectedBody ?? "",
-				tokens: countTokens(projectedBody ?? ""),
+				// Full-body token count, not projected. This is the cost to
+				// promote the entry — the number the model needs to do Token
+				// Budget math. Projecting the demoted symbol-preview (145
+				// tokens for a 2108-token file) was misleading the model into
+				// promotes that blew the Token Budget by 10-30× per entry.
+				tokens: countTokens(row.body ?? ""),
 				attributes: row.attributes,
 				category: row.category,
 				source_turn: row.turn,
@@ -69,6 +81,35 @@ export default class TurnExecutor {
 			run_id: runId,
 		});
 		const lastContextTokens = lastCtx?.context_tokens ?? 0;
+		// Baseline materialization — assemble with model's promoted spending
+		// removed (promoted data, promoted logging). The resulting size is the
+		// fixed overhead the model can't reduce without further demotion.
+		const baselineRows = rows.filter(
+			(r) =>
+				!(
+					(r.category === "data" || r.category === "logging") &&
+					r.fidelity === "promoted"
+				),
+		);
+		const baselineMessages = await ContextAssembler.assembleFromTurnContext(
+			baselineRows,
+			{
+				type: mode,
+				systemPrompt,
+				contextSize,
+				demoted,
+				toolSet,
+				lastContextTokens,
+				turn,
+			},
+			this.#hooks,
+		);
+		const baselineTokens = baselineMessages.reduce(
+			(sum, m) => sum + countTokens(m.content),
+			0,
+		);
 		const messages = await ContextAssembler.assembleFromTurnContext(
 			rows,
 			{
@@ -79,6 +120,7 @@ export default class TurnExecutor {
 				toolSet,
 				lastContextTokens,
 				turn,
+				baselineTokens,
 			},
 			this.#hooks,
 		);
@@ -179,7 +221,7 @@ export default class TurnExecutor {
 			scheme: "instructions",
 			body: instrEntry[0]?.body || "",
 			attributes: instrAttrs,
-			fidelity: "full",
+			fidelity: "promoted",
 			category: "system",
 		});
@@ -232,7 +274,7 @@ export default class TurnExecutor {
 					await this.#knownStore.setFidelity(
 						currentRunId,
 						promptRow.path,
-						"summary",
+						"demoted",
 					);
 				}
 				const reMat = await this.#materializeTurnContext({
@@ -281,10 +323,13 @@ export default class TurnExecutor {
 			}
 		}
+		const runRow = await this.#db.get_run_by_id.get({ id: currentRunId });
 		const filteredMessages = await this.#hooks.llm.messages.filter(messages, {
 			model: requestedModel,
 			projectId,
 			runId: currentRunId,
+			runAlias: runRow?.alias || `run_${currentRunId}`,
+			turn,
 		});
 		// Call LLM