npm - runcap - Versions diffs - 0.2.1 → 0.3.0 - Mend

runcap 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +22 -4
package/package.json +5 -2
package/scripts/acceptance.mjs +67 -0
package/scripts/delta-test.mjs +130 -0
package/scripts/demo-flow.mjs +20 -0
package/scripts/loop-test.mjs +84 -0
package/scripts/make-demo-svg.mjs +75 -0
package/scripts/make-linkedin-delta-video.mjs +412 -0
package/scripts/validate-demo.mjs +49 -0
package/src/compressor.mjs +268 -1
package/src/mission-control.mjs +40 -3

package/src/compressor.mjs CHANGED Viewed

@@ -18,17 +18,107 @@
 // "X tokens saved by compression". Token counts are an estimate (~4 chars/token),
 // labeled `estimated`, never claimed as provider-exact.
+import { createHash } from "node:crypto";
 const CHARS_PER_TOKEN = 4;
 const MIN_FIELD_CHARS = 200; // below this, compression overhead isn't worth it
+const MIN_DEDUP_CHARS = 256; // only dedup blocks big enough to be worth a stub
 const LOG_HEAD_LINES = 12;
 const LOG_TAIL_LINES = 8;
 const LOG_COLLAPSE_THRESHOLD = 40; // collapse runs longer than this
+// --- delta-encoding of near-duplicate blocks ---
+// When a block is similar (not identical) to one seen earlier in the same
+// request, we replace it with a line-diff against the original. This is the
+// case identical-dedup misses: an agent re-reads a file AFTER editing it.
+// Lossless: the exact text is recoverable from (original block + diff).
+const DELTA_MIN_SIMILARITY = 0.5; // below this a diff isn't smaller than the original
+const DELTA_MAX_LINES = 2500; // LCS is O(n*m); above ~2500 lines a diff can cost >25ms, so skip to protect the hot path
 export function estimateTokens(text) {
   if (!text) return 0;
   return Math.ceil(String(text).length / CHARS_PER_TOKEN);
 }
+function shortHash(text) {
+  return createHash("sha1").update(text).digest("hex").slice(0, 8);
+}
+// Cheap line-overlap ratio. Used only to decide whether a full LCS diff is
+// worth computing; the real saving is measured against the emitted delta.
+export function lineSimilarity(aLines, bLines) {
+  const aSet = new Set(aLines);
+  let shared = 0;
+  for (const l of bLines) if (aSet.has(l)) shared++;
+  return shared / Math.max(aLines.length, bLines.length, 1);
+}
+// LCS-based line diff. Emits a compact op list of CHANGES only:
+//   { at: <line index in the original>, del: <lines removed>, ins: [<lines added>] }
+// Unchanged ranges are implied. Reconstruction walks the original applying ops.
+function lineDiff(aLines, bLines) {
+  const n = aLines.length, m = bLines.length;
+  const dp = Array.from({ length: n + 1 }, () => new Int32Array(m + 1));
+  for (let i = n - 1; i >= 0; i--) {
+    for (let j = m - 1; j >= 0; j--) {
+      dp[i][j] = aLines[i] === bLines[j]
+        ? dp[i + 1][j + 1] + 1
+        : Math.max(dp[i + 1][j], dp[i][j + 1]);
+    }
+  }
+  const ops = [];
+  let i = 0, j = 0, cur = null;
+  const flush = () => { if (cur) { ops.push(cur); cur = null; } };
+  while (i < n && j < m) {
+    if (aLines[i] === bLines[j]) { flush(); i++; j++; }
+    else if (dp[i + 1][j] >= dp[i][j + 1]) {
+      if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; }
+      cur.del++; i++;
+    } else {
+      if (!cur) cur = { at: i, del: 0, ins: [] };
+      cur.ins.push(bLines[j]); j++;
+    }
+  }
+  while (i < n) { if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; } cur.del++; i++; }
+  if (j < m) { if (!cur) cur = { at: i, del: 0, ins: [] }; while (j < m) cur.ins.push(bLines[j++]); }
+  flush();
+  return ops;
+}
+// Exact inverse of lineDiff: (original lines + ops) -> reconstructed string.
+// Walks ops in order (they are emitted sorted by `at`), copying untouched
+// original lines up to each op's anchor, then applying the op's deletes/inserts.
+// Order-based, so duplicate `at` values across ops are handled correctly.
+// Kept in-module so tests can prove losslessness against the real code path.
+export function applyLineDiff(aLines, ops) {
+  const out = [];
+  let i = 0; // cursor into aLines
+  for (const op of ops) {
+    while (i < op.at && i < aLines.length) { out.push(aLines[i]); i++; }
+    for (const ins of op.ins) out.push(ins);
+    i += op.del;
+  }
+  while (i < aLines.length) { out.push(aLines[i]); i++; }
+  return out.join("\n");
+}
+// Render a delta as a block the MODEL can read and apply in its head. The header
+// names the base (sha + which message it first appeared in) so the model knows
+// what to patch; each op is shown as removed/added lines at a 1-based line number.
+function renderDelta(baseHash, firstIndex, ops) {
+  const lines = [
+    `[runcap delta vs the identical block first seen in message ${firstIndex + 1} (sha:${baseHash}).`,
+    ` Reconstruct the current text by applying these line changes to that block; all other lines are unchanged.]`
+  ];
+  for (const op of ops) {
+    const at1 = op.at + 1;
+    if (op.del > 0) lines.push(`@@ line ${at1}: remove ${op.del} line(s)`);
+    else lines.push(`@@ line ${at1}: insert`);
+    for (const ins of op.ins) lines.push(`+ ${ins}`);
+  }
+  return lines.join("\n");
+}
 // Re-serialize an embedded JSON string compactly. Handles two shapes safely:
 //   1. The whole field is JSON ("{...}" or "[...]").
 //   2. A short text prefix followed by a JSON blob ("Here is the data:\n{...}").
@@ -109,6 +199,120 @@ function compressField(value) {
   return out;
 }
+// Deduplicate identical content blocks within a single request. In a long
+// agentic session the same file dump or tool_result ships as a fresh block on
+// every turn (the agent re-reads auth.ts five times); the model already saw
+// those exact bytes earlier in the SAME request, so replacing the repeats with
+// a deterministic stub is lossless-by-construction. This is where the real
+// savings on agentic traffic live — per-field whitespace/JSON trimming barely
+// moves the needle by comparison.
+//
+// Walks messages in order. The first occurrence of a block is kept verbatim;
+// any later block with the same content hash becomes:
+//   [runcap: identical content seen at message N, sha:abcd1234]
+// We only dedup blocks >= MIN_DEDUP_CHARS so a tiny stub never costs more than
+// the original. Mutates the message tree in place on the already-cloned `next`.
+function dedupRepeatedBlocks(body) {
+  let saved = 0;
+  let blocks = 0;
+  let deltas = 0;
+  // hash -> { index, text, lines } for the first occurrence of each block.
+  const seen = new Map();
+  // Ordered list of prior blocks, for near-duplicate (delta) matching.
+  const priors = [];
+  const stubFor = (hash, firstIndex) =>
+    `[runcap: identical content seen at message ${firstIndex + 1}, sha:${hash}]`;
+  // Try to encode `text` as a delta against the most similar prior block.
+  // Returns the delta string if it is smaller than the original, else null.
+  const tryDelta = (text) => {
+    const bLines = text.split("\n");
+    if (bLines.length > DELTA_MAX_LINES) return null; // protect the hot path
+    let best = null;
+    for (const p of priors) {
+      if (p.lines.length > DELTA_MAX_LINES) continue;
+      const sim = lineSimilarity(p.lines, bLines);
+      if (sim < DELTA_MIN_SIMILARITY) continue;
+      if (!best || sim > best.sim) best = { ...p, sim };
+    }
+    if (!best) return null;
+    const ops = lineDiff(best.lines, bLines);
+    // Safety: only emit if it reconstructs exactly (lossless-by-construction).
+    if (applyLineDiff(best.lines, ops) !== text) return null;
+    const rendered = renderDelta(best.hash, best.index, ops);
+    return rendered.length < text.length ? rendered : null;
+  };
+  const dedupString = (text, msgIndex) => {
+    if (typeof text !== "string" || text.length < MIN_DEDUP_CHARS) return text;
+    const hash = shortHash(text);
+    const firstSeen = seen.get(hash);
+    if (firstSeen === undefined) {
+      // First time we see this exact block. Try a delta vs an earlier *similar*
+      // block before recording it as a fresh original.
+      const delta = tryDelta(text);
+      const record = { index: msgIndex, hash, text, lines: text.split("\n") };
+      seen.set(hash, record);
+      priors.push(record);
+      if (delta !== null) {
+        saved += text.length - delta.length;
+        blocks += 1;
+        deltas += 1;
+        return delta;
+      }
+      return text;
+    }
+    const stub = stubFor(hash, firstSeen.index);
+    if (stub.length >= text.length) return text;
+    saved += text.length - stub.length;
+    blocks += 1;
+    return stub;
+  };
+  const dedupContent = (content, msgIndex) => {
+    if (typeof content === "string") return dedupString(content, msgIndex);
+    if (Array.isArray(content)) {
+      return content.map((part) => {
+        if (!part || typeof part !== "object") return part;
+        // OpenAI/Anthropic text parts
+        if (typeof part.text === "string") {
+          return { ...part, text: dedupString(part.text, msgIndex) };
+        }
+        // Anthropic tool_result blocks: content can be string or array of parts
+        if (part.type === "tool_result") {
+          if (typeof part.content === "string") {
+            return { ...part, content: dedupString(part.content, msgIndex) };
+          }
+          if (Array.isArray(part.content)) {
+            return {
+              ...part,
+              content: part.content.map((c) =>
+                c && typeof c === "object" && typeof c.text === "string"
+                  ? { ...c, text: dedupString(c.text, msgIndex) }
+                  : c
+              )
+            };
+          }
+        }
+        return part;
+      });
+    }
+    return content;
+  };
+  let next = body;
+  if (Array.isArray(body.messages)) {
+    next = {
+      ...body,
+      messages: body.messages.map((m, i) =>
+        m && typeof m === "object" && "content" in m ? { ...m, content: dedupContent(m.content, i) } : m
+      )
+    };
+  }
+  return { body: next, saved, blocks, deltas };
+}
 // Walk an OpenAI- or Anthropic-shaped request body and compress message content.
 // Returns { body, before, after, savedChars, savedTokens, touched }.
 export function compressRequestBody(body) {
@@ -156,6 +360,12 @@ export function compressRequestBody(body) {
     next = { ...next, input: compressContent(next.input) };
   }
+  // Cross-message dedup of identical blocks + delta-encoding of near-duplicates
+  // (the big win on agentic traffic: re-reads after an edit).
+  const deduped = dedupRepeatedBlocks(next);
+  next = deduped.body;
+  touched += deduped.blocks;
   const measureAfter = JSON.stringify(next).length;
   const savedChars = Math.max(0, measureBefore - measureAfter);
   return {
@@ -164,6 +374,63 @@ export function compressRequestBody(body) {
     after: measureAfter,
     savedChars,
     savedTokens: Math.round(savedChars / CHARS_PER_TOKEN),
-    touched
+    touched,
+    deltas: deduped.deltas
+  };
+}
+// --- loop / circling detection (the "looks productive but stuck" signal) ---
+// The gateway sees every request the agent sends. An agent that is circling the
+// same failure with reworded attempts sends prompts that are SIMILAR-but-not-
+// identical turn after turn: the conversation tail barely moves while tokens
+// keep burning. Plain hashing misses this (the text differs slightly each loop);
+// this catches it with the same line-similarity primitive the delta-encoder uses.
+const LOOP_SIMILARITY = 0.92; // two consecutive prompts this similar = no real progress made between them
+const LOOP_MIN_REPEATS = 3;   // how many near-identical prompts in a row before we warn
+// Pull the comparable "shape" of a request: the concatenated text the agent is
+// actually sending this turn (messages / input / system), order-preserving.
+export function requestShapeText(body) {
+  if (!body || typeof body !== "object") return "";
+  const parts = [];
+  const push = (content) => {
+    if (typeof content === "string") parts.push(content);
+    else if (Array.isArray(content)) {
+      for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
+    }
+  };
+  if (Array.isArray(body.messages)) for (const m of body.messages) if (m && typeof m === "object") push(m.content);
+  if (body.system !== undefined) push(body.system);
+  if (typeof body.input === "string") push(body.input);
+  return parts.join("\n");
+}
+// Given the current request and a rolling history of prior request shapes,
+// decide whether the agent is circling. Returns { looping, repeats, similarity }.
+// History is oldest->newest of prior requestShapeText() strings in this session.
+export function detectLoop(currentShape, history, {
+  similarityThreshold = LOOP_SIMILARITY,
+  minRepeats = LOOP_MIN_REPEATS
+} = {}) {
+  if (!currentShape || !Array.isArray(history) || history.length === 0) {
+    return { looping: false, repeats: 0, similarity: 0 };
+  }
+  const curLines = String(currentShape).split("\n");
+  let repeats = 0;
+  let lastSimilarity = 0;
+  // Walk backward through history; count the unbroken run of near-identical turns.
+  for (let i = history.length - 1; i >= 0; i--) {
+    const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
+    if (sim >= similarityThreshold) {
+      repeats += 1;
+      lastSimilarity = sim;
+    } else {
+      break;
+    }
+  }
+  return {
+    looping: repeats >= minRepeats,
+    repeats,
+    similarity: Number(lastSimilarity.toFixed(3))
   };
 }

package/src/mission-control.mjs CHANGED Viewed

@@ -7,7 +7,7 @@ import path from "node:path";
 import process from "node:process";
 import { syncRun } from "./cloud.mjs";
 import { sendAlert } from "./alerts.mjs";
-import { compressRequestBody, estimateTokens } from "./compressor.mjs";
+import { compressRequestBody, estimateTokens, requestShapeText, detectLoop } from "./compressor.mjs";
 const STORE_DIR = ".runcap";
 const MISSIONS_DIR = path.join(STORE_DIR, "missions");
@@ -523,6 +523,12 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
   if (gatewayMode !== "mock" && !openaiKey && !anthropicKey) {
     throw new Error("Missing upstream key. Set OPENAI_API_KEY (for /v1/chat/completions) and/or ANTHROPIC_API_KEY (for /v1/messages). The gateway cannot proxy without at least one.");
   }
+  // Rolling history of recent request shapes (per gateway process) so we can
+  // detect an agent circling the same failure with reworded prompts: similar-
+  // but-not-identical turns, which plain hashing never catches.
+  const loopEnabled = (process.env.AIM_LOOP_DETECT ?? "on").toLowerCase() !== "off";
+  const shapeHistory = [];
+  const SHAPE_HISTORY_MAX = 12;
   const server = http.createServer(async (request, response) => {
     const started = Date.now();
     try {
@@ -545,6 +551,17 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
       const bodyText = await readRequestBody(request);
       const requestBody = safeJson(bodyText) ?? {};
+      // Loop signal: compare this request's shape against the recent run.
+      let loop = null;
+      if (loopEnabled) {
+        const shape = requestShapeText(requestBody);
+        if (shape) {
+          const result = detectLoop(shape, shapeHistory);
+          loop = { looping: result.looping, repeats: result.repeats, similarity: result.similarity, truth: "calculated" };
+          shapeHistory.push(shape);
+          if (shapeHistory.length > SHAPE_HISTORY_MAX) shapeHistory.shift();
+        }
+      }
       const budget = readBudget();
       const summary = await readGatewaySummary({ windowMs: budgetWindowMs() });
       // Compress the request body once (safe, lossless-by-construction). Disable with AIM_COMPRESS=off.
@@ -561,6 +578,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
             beforeChars: c.before,
             afterChars: c.after,
             fieldsTouched: c.touched,
+            deltas: c.deltas ?? 0,
             truth: "estimated"
           };
         }
@@ -590,6 +608,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
             capUsd: budget,
             blockedByThisCall
           },
+          loop,
           error: blockedByThisCall
             ? `Budget would be exceeded by this call: $${summary.estimatedCostUsd} spent + ~$${callEstimate} this call > cap $${budget}`
             : `Budget exceeded: ${summary.estimatedCostUsd} >= ${budget}`,
@@ -630,6 +649,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
           usage: responseBody.usage,
           cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
           compression,
+          loop,
           truth: "mock_provider_usage",
           requestHash: createHash("sha1").update(bodyText).digest("hex")
         });
@@ -681,9 +701,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
         usage: responseBody.usage ?? null,
         cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
         compression,
+        loop,
         truth: responseBody.usage ? "provider_usage" : "unknown",
         requestHash: createHash("sha1").update(bodyText).digest("hex")
       });
+      if (loop && loop.looping) {
+        sendAlert(`Runcap: possible stuck loop. The agent has sent ${loop.repeats} near-identical prompts in a row (${Math.round(loop.similarity * 100)}% similar) without the conversation moving forward. It may be circling the same failure with reworded attempts.`)
+          .catch(() => {});
+      }
       if (responseBody.usage) {
         const spent = await readGatewaySummary({ windowMs: budgetWindowMs() });
         syncRun({
@@ -768,19 +793,23 @@ export async function showStatus(options = {}) {
   const gateway = await readGatewaySummary();
   const gatewayLine = `Gateway: ${gateway.callCount} calls, ${gateway.totalTokens} tokens, $${gateway.estimatedCostUsd} estimated (${gateway.truth})`;
+  const loopLine = gateway.loop?.looping
+    ? `Loop warning: last ${gateway.loop.repeats} prompts were ${Math.round(gateway.loop.similarity * 100)}% identical with no progress. The agent may be circling the same failure (truth: calculated).`
+    : null;
   const latest = await latestMissionId();
-  if (!latest) return `${fuelLine}\n${gatewayLine}\nNo missions recorded yet.`;
+  if (!latest) return [fuelLine, gatewayLine, loopLine, "No missions recorded yet."].filter(Boolean).join("\n");
   const mission = await readMission(latest);
   return [
     fuelLine,
     gatewayLine,
+    loopLine,
     `Latest mission: ${mission.id}`,
     `Status: ${mission.stuck.status}`,
     `Exit code: ${mission.exitCode}`,
     `Changed files: ${mission.diffEvidence.changedFiles.length}`,
     `Errors: ${mission.errors.length}`,
     `Report: ${path.join(MISSIONS_DIR, mission.id, "report.md")}`
-  ].join("\n");
+  ].filter(Boolean).join("\n");
 }
 export async function recordFuel(value) {
@@ -1418,6 +1447,13 @@ async function readGatewaySummary({ windowMs } = {}) {
     const inputRate = pricing ? pricing.inputPerMillion : 3; // fall back to a mid Sonnet-ish rate
     return sum + (saved * inputRate) / 1_000_000;
   }, 0);
+  // Loop signal: the most recent event that carries a loop verdict tells us
+  // whether the agent is currently circling (similar-but-not-identical prompts
+  // repeated without progress). This is the "looks productive but stuck" case.
+  const lastWithLoop = [...events].reverse().find((event) => event.loop);
+  const loop = lastWithLoop
+    ? { ...lastWithLoop.loop, at: lastWithLoop.at, model: lastWithLoop.model }
+    : { looping: false, repeats: 0, similarity: 0, truth: "calculated" };
   return {
     callCount: events.length,
     successfulCallCount: successful.length,
@@ -1426,6 +1462,7 @@ async function readGatewaySummary({ windowMs } = {}) {
     savedTokens,
     savedUsd: Number(savedUsd.toFixed(6)),
     wouldHaveSpentUsd: Number((estimatedCost + savedUsd).toFixed(6)),
+    loop,
     truth: events.some((event) => event.truth === "provider_usage" || event.truth === "mock_provider_usage")
       ? "usage_plus_static_price_table"
       : "unknown",