npm - @tyvm/knowhow - Versions diffs - 0.0.114 → 0.0.116 - Mend

@tyvm/knowhow 0.0.114 → 0.0.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +1 -1
package/scripts/test-repetition-hint.ts +82 -14
package/src/agents/tools/patch.ts +240 -27
package/src/processors/CustomVariables.ts +56 -2
package/tests/patching/regression-2026.test.ts +283 -0
package/ts_build/package.json +1 -1
package/ts_build/src/agents/tools/patch.js +235 -16
package/ts_build/src/agents/tools/patch.js.map +1 -1
package/ts_build/src/processors/CustomVariables.d.ts +3 -0
package/ts_build/src/processors/CustomVariables.js +34 -2
package/ts_build/src/processors/CustomVariables.js.map +1 -1
package/ts_build/tests/patching/regression-2026.test.d.ts +1 -0
package/ts_build/tests/patching/regression-2026.test.js +163 -0
package/ts_build/tests/patching/regression-2026.test.js.map +1 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tyvm/knowhow",
-  "version": "0.0.114",
+  "version": "0.0.116",
   "description": "ai cli with plugins and agents",
   "main": "ts_build/src/index.js",
   "bin": {

package/scripts/test-repetition-hint.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env ts-node
 /**
  * Test script: runs the repetition hint processor logic against a real agent metadata file
- * and prints whether the hint would fire and why/why not.
+ * and prints whether the hint would fire and why/why not, with token savings estimates.
  *
  * Usage:
  *   npx ts-node scripts/test-repetition-hint.ts [path-to-metadata.json]
@@ -76,7 +76,7 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
   for (let i = 0; i < a.length - minLength + 1; i++) {
     for (let j = a.length; j > i + minLength - 1; j--) {
       const sub = a.slice(i, j);
-      if (sub.length <= best.length) break; // already found longer, skip shorter
+      if (sub.length <= best.length) break;
       if (b.includes(sub)) {
         best = sub;
         break;
@@ -86,12 +86,18 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
   return best.length >= minLength ? best : null;
 }
+interface ProcessorResult {
+  wouldHint: boolean;
+  repeatedTools: string[];
+  details: Map<string, { count: number; tools: Set<string> }>;
+}
 function runProcessor(
   messages: Message[],
   minLength = 50,
   minRepetitions = 2,
   minSubstringLength = 50
-): { wouldHint: boolean; repeatedTools: string[]; details: Map<string, { count: number; tools: Set<string> }> } {
+): ProcessorResult {
   const stringCounts = new Map<string, { count: number; tools: Set<string> }>();
   const toolStrings = collectToolCallStrings(messages, minLength);
@@ -107,13 +113,12 @@ function runProcessor(
   }
   // Step 2: repeated substrings across different full strings
-  // e.g. the same JWT embedded in many different commands
   const substringCounts = new Map<string, { count: number; tools: Set<string> }>();
   for (let i = 0; i < toolStrings.length; i++) {
     for (let j = i + 1; j < toolStrings.length; j++) {
       const a = toolStrings[i];
       const b = toolStrings[j];
-      if (a.value === b.value) continue; // already handled by exact match
+      if (a.value === b.value) continue;
       const common = longestCommonSubstring(a.value, b.value, minSubstringLength);
       if (common) {
         const existing = substringCounts.get(common);
@@ -128,7 +133,7 @@ function runProcessor(
     }
   }
-  // Merge substring counts: count = number of unique pairs, count+1 = number of occurrences
+  // Merge substring counts
   for (const [sub, info] of substringCounts.entries()) {
     if (info.count + 1 >= minRepetitions && !stringCounts.has(sub)) {
       stringCounts.set(sub, { count: info.count + 1, tools: info.tools });
@@ -148,6 +153,26 @@ function runProcessor(
   return { wouldHint: repeatedTools.length > 0, repeatedTools, details: stringCounts };
 }
+/**
+ * Estimate tokens saved by using variables for repeated strings.
+ * Savings = (repetitions - 1) * str.length chars / 4 chars-per-token
+ * Minus the cost of the reminder message itself (estimated tokens in hint message).
+ */
+function estimateNetTokenSavings(
+  details: Map<string, { count: number; tools: Set<string> }>,
+  hintMessageTokens: number
+): { gross: number; net: number } {
+  let totalCharsSaved = 0;
+  for (const [str, info] of details.entries()) {
+    if (info.count >= 2) {
+      totalCharsSaved += (info.count - 1) * str.length;
+    }
+  }
+  const gross = Math.round(totalCharsSaved / 4);
+  const net = gross - hintMessageTokens;
+  return { gross, net };
+}
 // ---- Main ----
 const raw = fs.readFileSync(metadataPath, "utf-8");
@@ -158,6 +183,13 @@ console.log(`\n=== Repetition Hint Processor Test ===`);
 console.log(`File: ${metadataPath}`);
 console.log(`Threads: ${threads.length}`);
+// Approximate tokens in the hint message itself (the reminder we send to the agent)
+// ~100 tokens for the base message + ~30 per example
+const HINT_BASE_TOKENS = 100;
+const HINT_TOKENS_PER_EXAMPLE = 30;
+const MAX_EXAMPLES = 3;
+const HINT_MESSAGE_TOKENS = HINT_BASE_TOKENS + MAX_EXAMPLES * HINT_TOKENS_PER_EXAMPLE;
 for (let ti = 0; ti < threads.length; ti++) {
   const thread = threads[ti];
   const toolCallMsgs = thread.filter((m) => m.tool_calls && m.tool_calls.length > 0);
@@ -177,19 +209,55 @@ for (let ti = 0; ti < threads.length; ti++) {
   const newResult = runProcessor(thread, 50, 2, 50);
   if (newResult.wouldHint) {
     console.log(`✅ Would hint! Repeated tools: ${newResult.repeatedTools.join(", ")}`);
-    // Show top repeated substrings
+    const { gross, net } = estimateNetTokenSavings(newResult.details, HINT_MESSAGE_TOKENS);
+    console.log(`\n  💰 Token savings estimate:`);
+    console.log(`     Gross savings (repeated chars ÷ 4)  : ~${gross} tokens`);
+    console.log(`     Cost of reminder message            : ~${HINT_MESSAGE_TOKENS} tokens`);
+    console.log(`     Net savings                         : ~${net} tokens`);
+    // Sort by impact (count * length) descending
     const repeated = Array.from(newResult.details.entries())
       .filter(([, info]) => info.count >= 2)
-      .sort((a, b) => b[1].count - a[1].count)
+      .sort((a, b) => (b[1].count * b[0].length) - (a[1].count * a[0].length))
       .slice(0, 5);
-    console.log(`\n  Top repeated values (count, tools, preview):`);
-    for (const [str, info] of repeated) {
-      console.log(`    count=${info.count}, tools=${[...info.tools].join(",")}`);
-      console.log(`    value=${JSON.stringify(str.slice(0, 120))}`);
-    }
+    console.log(`\n  Top repeated values to store as variables (sorted by token impact):`);
+    repeated.forEach(([str, info], i) => {
+      const charsSaved = (info.count - 1) * str.length;
+      const toksSaved = Math.round(charsSaved / 4);
+      const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
+      const ellipsis = str.length > 80 ? "…" : "";
+      console.log(`\n  [var${i + 1}]`);
+      console.log(`    count    : ${info.count}x`);
+      console.log(`    tools    : ${[...info.tools].join(", ")}`);
+      console.log(`    ~savings : ${toksSaved} tokens (${charsSaved} chars)`);
+      console.log(`    value    : "${preview}${ellipsis}"`);
+      if (str.length > 80) {
+        console.log(`    (full len: ${str.length} chars)`);
+      }
+    });
+    // Show what the actual hint message would look like
+    const examples = repeated.slice(0, MAX_EXAMPLES).map(([str, info], i) => {
+      const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
+      const ellipsis = str.length > 80 ? "…" : "";
+      const toksSaved = Math.round(((info.count - 1) * str.length) / 4);
+      return `  • \`var${i + 1}\` (used ${info.count}x in ${[...info.tools].join(", ")}, ~${toksSaved} tokens saveable): "${preview}${ellipsis}"`;
+    });
+    console.log(`\n  Example hint message that would be shown to the agent:`);
+    console.log(`  ---`);
+    console.log(
+      `  ⚠️ Tool inputs have large repetitions detected in: ${newResult.repeatedTools.join(", ")} ` +
+      `(~${gross} output tokens could be saved, ~${net} net after this reminder).\n` +
+      `  Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`,\n` +
+      `  then reference them via {{variableName}} in future tool calls.\n` +
+      `  Top repeated values to consider storing as variables:\n` +
+      examples.join("\n")
+    );
+    console.log(`  ---`);
   } else {
     console.log(`❌ Would NOT hint.`);
-    // Show top large strings for diagnosis
     const toolStrings = collectToolCallStrings(thread, 50);
     console.log(`\n  Total large strings in tool calls: ${toolStrings.length}`);
     const top = toolStrings.slice(0, 3);

package/src/agents/tools/patch.ts CHANGED Viewed

@@ -250,6 +250,11 @@ const CONTEXT_LINES = 3; // Standard number of context lines
 function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
   const originalLines = splitByNewLines(originalContent);
+  // Special case: pure creation hunk on empty file (@@ -0,0 +1,N @@)
+  if (originalContent === "" && hunk.originalStartLine === 0 && hunk.originalLineCount === 0) {
+    return hunk; // Already valid, pass through as-is
+  }
   const deletionLinesContent = hunk.subtractions.map((l) => l.slice(1));
   const additionLinesContent = hunk.additions.map((l) => l.slice(1));
@@ -267,6 +272,16 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
         `Anchor found via deletion sequence at line ${actualOriginalStartLine}`
       );
     }
+    // 1b. If full sequence not found (non-contiguous deletions), anchor on first deletion alone
+    if (actualOriginalStartLine === -1) {
+      const firstDeletionLines = findAllLineNumbers(originalContent, deletionLinesContent[0]);
+      const closest = findClosestNumber(firstDeletionLines, hunk.originalStartLine);
+      if (closest !== undefined) {
+        actualOriginalStartLine = closest;
+        console.log(`Anchor found via first deletion line at line ${actualOriginalStartLine}`);
+      }
+    }
   }
   // 2. If deletions didn't anchor, try anchoring using context *before* the first change
@@ -351,30 +366,228 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
   // Ensure start line is at least 1
   actualOriginalStartLine = Math.max(1, actualOriginalStartLine);
-  // 4. Reconstruct the hunk with correct context
-  const contextBeforeStartLine = Math.max(
-    0,
-    actualOriginalStartLine - CONTEXT_LINES - 1
-  ); // 0-based index
-  const contextBeforeEndLine = Math.max(0, actualOriginalStartLine - 1); // 0-based index
-  const contextBefore = originalLines
-    .slice(contextBeforeStartLine, contextBeforeEndLine)
-    .map((l) => ` ${l}`);
-  // End line of original content affected by deletions (1-based)
-  const originalContentEndLine =
-    actualOriginalStartLine + deletionLinesContent.length;
-  const contextAfterStartLine = originalContentEndLine - 1; // 0-based index
-  const contextAfterEndLine = Math.min(
-    originalLines.length,
-    contextAfterStartLine + CONTEXT_LINES
-  ); // 0-based index
-  const contextAfter = originalLines
-    .slice(contextAfterStartLine, contextAfterEndLine)
-    .map((l) => ` ${l}`);
+  // 4a. Detect interleaved hunks (context lines between change blocks)
+  // If so, preserve original body order, just filter ghost context lines and fix header
+  let hasInterleavedChanges = false;
+  let seenChange = false;
+  let seenContextAfterChange = false;
+  for (const line of hunk.lines) {
+    if (line.startsWith("+") || line.startsWith("-")) {
+      if (seenContextAfterChange) { hasInterleavedChanges = true; break; }
+      seenChange = true;
+    } else if (line.startsWith(" ") && seenChange) {
+      seenContextAfterChange = true;
+    }
+  }
+  // Check if there are more changes after a context block
+  if (seenContextAfterChange) {
+    for (const line of hunk.lines.slice(hunk.lines.findIndex((l, i) => {
+      let sc = false;
+      for (let j = 0; j <= i; j++) {
+        if (hunk.lines[j].startsWith("+") || hunk.lines[j].startsWith("-")) sc = true;
+        if (sc && hunk.lines[j].startsWith(" ") && j === i) return true;
+      }
+      return false;
+    }))) {
+      if (line.startsWith("+") || line.startsWith("-")) { hasInterleavedChanges = true; break; }
+    }
+  }
+  if (hasInterleavedChanges) {
+    // Group hunk lines into change-blocks separated by context
+    // Each block: { deletions, additions }
+    type ChangeBlock = { deletions: string[]; additions: string[]; };
+    const blocks: ChangeBlock[] = [];
+    let curBlock: ChangeBlock = { deletions: [], additions: [] };
+    let inBlock = false;
+    for (const line of hunk.lines) {
+      if (line.startsWith("-")) { curBlock.deletions.push(line.slice(1)); inBlock = true; }
+      else if (line.startsWith("+")) { curBlock.additions.push(line.slice(1)); inBlock = true; }
+      else if (inBlock) {
+        blocks.push(curBlock);
+        curBlock = { deletions: [], additions: [] };
+        inBlock = false;
+      }
+    }
+    if (inBlock || curBlock.deletions.length > 0 || curBlock.additions.length > 0) blocks.push(curBlock);
+    // For each block, try to apply as line-level or substring replacement
+    let resultLines = [...originalLines];
+    let lineOffset = 0;
+    let anyApplied = false;
+    for (const block of blocks) {
+      if (block.deletions.length === 0) continue;
+      // Try exact line match first
+      const seqIdx = findSequenceIndex(resultLines, block.deletions);
+      if (seqIdx !== -1) {
+        resultLines = [
+          ...resultLines.slice(0, seqIdx),
+          ...block.additions,
+          ...resultLines.slice(seqIdx + block.deletions.length),
+        ];
+        lineOffset += block.additions.length - block.deletions.length;
+        anyApplied = true;
+        continue;
+      }
+      // Try substring replacement: find a line containing all deletion content
+      const delContent = block.deletions.join(" ").trim();
+      const addContent = block.additions.join(" ").trim();
+      const matchIdx = resultLines.findIndex((l) => l.includes(delContent.split(" ")[0]) && block.deletions.every((d) => l.includes(d.trim())));
+      if (matchIdx !== -1) {
+        let newLine = resultLines[matchIdx];
+        for (let i = 0; i < block.deletions.length; i++) {
+          newLine = newLine.replace(block.deletions[i].trim(), block.additions[i]?.trim() ?? "");
+        }
+        resultLines = [...resultLines.slice(0, matchIdx), newLine, ...resultLines.slice(matchIdx + 1)];
+        anyApplied = true;
+      }
+    }
+    if (anyApplied) {
+      // Build a replacement patch from original -> result
+      const origStr = originalLines.join("\n");
+      const newStr = resultLines.join("\n");
+      // Find first differing line
+      let firstDiff = 0;
+      while (firstDiff < originalLines.length && firstDiff < resultLines.length && originalLines[firstDiff] === resultLines[firstDiff]) firstDiff++;
+      let lastDiffOrig = originalLines.length - 1;
+      let lastDiffNew = resultLines.length - 1;
+      while (lastDiffOrig > firstDiff && lastDiffNew > firstDiff && originalLines[lastDiffOrig] === resultLines[lastDiffNew]) { lastDiffOrig--; lastDiffNew--; }
+      const ctxStart = Math.max(0, firstDiff - 1);
+      const ctxEndOrig = Math.min(originalLines.length - 1, lastDiffOrig + 1);
+      const ctxEndNew = Math.min(resultLines.length - 1, lastDiffNew + 1);
+      const patchLines: string[] = [];
+      for (let i = ctxStart; i <= ctxEndOrig; i++) {
+        if (i >= firstDiff && i <= lastDiffOrig) patchLines.push(`-${originalLines[i]}`);
+        else patchLines.push(` ${originalLines[i]}`);
+      }
+      // Insert additions at right position
+      const finalLines: string[] = [];
+      for (let i = ctxStart; i <= ctxEndOrig; i++) {
+        if (i >= firstDiff && i <= lastDiffOrig) { finalLines.push(`-${originalLines[i]}`); }
+        else finalLines.push(` ${originalLines[i]}`);
+      }
+      // Add additions after last deletion
+      for (let i = firstDiff; i <= lastDiffNew; i++) {
+        if (i >= firstDiff && i <= lastDiffNew && (i > lastDiffOrig || originalLines[i] !== resultLines[i])) {
+          if (!finalLines.some((l) => l === `+${resultLines[i]}`)) finalLines.push(`+${resultLines[i]}`);
+        }
+      }
+      const origCount2 = finalLines.filter((l) => !l.startsWith("+")).length;
+      const newCount2 = finalLines.filter((l) => !l.startsWith("-")).length;
+      const newHeader2 = `@@ -${ctxStart + 1},${origCount2} +${ctxStart + 1},${newCount2} @@`;
+      return {
+        header: newHeader2,
+        originalStartLine: ctxStart + 1,
+        originalLineCount: origCount2,
+        newStartLine: ctxStart + 1,
+        newLineCount: newCount2,
+        lines: finalLines,
+        additions: finalLines.filter((l) => l.startsWith("+")),
+        subtractions: finalLines.filter((l) => l.startsWith("-")),
+        contextLines: finalLines.filter((l) => l.startsWith(" ")),
+      };
+    }
+    // Fallback: filter valid lines and return
+    const validLines = hunk.lines.filter((l) => {
+      if (l.startsWith("+") || l.startsWith("-")) return true;
+      if (!l.startsWith(" ") && l.trim() !== "") return false;
+      const content = l.startsWith(" ") ? l.slice(1) : l;
+      if (content.trim() === "") return originalLines.includes(content);
+      return originalLines.some((fl) => fl.trim() === content.trim());
+    }).map((l) => (!l.startsWith("+") && !l.startsWith("-") && !l.startsWith(" ")) ? ` ${l}` : l);
+    const origCount = validLines.filter((l) => !l.startsWith("+")).length;
+    const newCount = validLines.filter((l) => !l.startsWith("-")).length;
+    const newHeader = `@@ -${actualOriginalStartLine},${origCount} +${actualOriginalStartLine},${newCount} @@`;
+    return {
+      header: newHeader,
+      originalStartLine: actualOriginalStartLine,
+      originalLineCount: origCount,
+      newStartLine: actualOriginalStartLine,
+      newLineCount: newCount,
+      lines: validLines,
+      additions: hunk.additions,
+      subtractions: hunk.subtractions,
+      contextLines: validLines.filter((l) => !l.startsWith("+") && !l.startsWith("-")),
+    };
+  }
+  // Pure insertion: output minimal -N,0 format required by unified diff spec
+  if (deletionLinesContent.length === 0 && additionLinesContent.length > 0) {
+    const pureHeader = `@@ -${actualOriginalStartLine},0 +${actualOriginalStartLine},${hunk.additions.length} @@`;
+    return {
+      header: pureHeader,
+      originalStartLine: actualOriginalStartLine,
+      originalLineCount: 0,
+      newStartLine: actualOriginalStartLine,
+      newLineCount: hunk.additions.length,
+      lines: hunk.additions,
+      additions: hunk.additions,
+      subtractions: [],
+      contextLines: [],
+    };
+  }
+  // 4. Extract context lines from the original hunk body
+  const hunkContextBefore: string[] = [];
+  const hunkContextAfter: string[] = [];
+  let pastChanges = false;
+  for (const line of hunk.lines) {
+    if (line.startsWith("+") || line.startsWith("-")) {
+      pastChanges = true;
+    } else if (line.startsWith(" ")) {
+      if (!pastChanges) hunkContextBefore.push(line);
+      else hunkContextAfter.push(line);
+    }
+  }
+  // Validate context lines against the file (reject ghost lines not present in file)
+  // Replace context lines with the actual line from the file to fix indentation divergence
+  const validContextBefore = hunkContextBefore
+    .map((l) => {
+      const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
+      return match !== undefined ? ` ${match}` : null;
+    })
+    .filter((l): l is string => l !== null);
+  const validContextAfter = hunkContextAfter
+    .map((l) => {
+      const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
+      return match !== undefined ? ` ${match}` : null;
+    })
+    .filter((l): l is string => l !== null);
+  // Supplement: add 1 extra line before the valid context for better anchoring
+  const supplementBeforeIdx = actualOriginalStartLine - 1 - validContextBefore.length - 1; // 0-based
+  const supplementBefore: string[] =
+    supplementBeforeIdx >= 0
+      ? [` ${originalLines[supplementBeforeIdx]}`]
+      : [];
+  const contextBefore = [...supplementBefore, ...validContextBefore];
+  // For context after: use valid context from hunk; if none, take 1 line from file
+  const originalContentEndLine = actualOriginalStartLine + deletionLinesContent.length;
+  let contextAfter: string[];
+  if (deletionLinesContent.length === 0) {
+    // Pure insertion: always take the line at the insertion point from file (don't trust hunk context position)
+    const afterIdx = actualOriginalStartLine - 1; // 0-based index of line at insertion point
+    contextAfter = afterIdx < originalLines.length ? [` ${originalLines[afterIdx]}`] : [];
+  } else {
+    contextAfter = validContextAfter;
+    if (contextAfter.length === 0) {
+      const afterIdx = originalContentEndLine - 1; // 0-based index after deletions
+      if (afterIdx < originalLines.length) {
+        contextAfter = [` ${originalLines[afterIdx]}`];
+      }
+    }
+  }
+  // For pure-insertion hunks (no deletions), don't supplement before - keep only hunk context
+  const finalContextBefore = deletionLinesContent.length === 0 ? validContextBefore : contextBefore;
   const newHunkLines = [
-    ...contextBefore,
+    ...finalContextBefore,
     ...hunk.subtractions, // Use the original subtraction lines from the input hunk
     ...hunk.additions, // Use the original addition lines from the input hunk
     ...contextAfter,
@@ -382,11 +595,11 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
   // 5. Recalculate the header
   const newOriginalStart =
-    contextBefore.length > 0
-      ? actualOriginalStartLine - contextBefore.length
+    finalContextBefore.length > 0
+      ? actualOriginalStartLine - finalContextBefore.length
       : actualOriginalStartLine;
   const newOriginalCount =
-    contextBefore.length + hunk.subtractions.length + contextAfter.length;
+    finalContextBefore.length + hunk.subtractions.length + contextAfter.length;
   // The new start line depends on how many lines were added/removed *before* this hunk.
   // For an isolated hunk fix, we often just base it on the original start.
@@ -394,7 +607,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
   // Let's keep it simple and relative to the original start for now.
   const newNewStart = newOriginalStart; // Simplification: Assume start line number matches original unless offset by prior hunks (which we don't know here)
   const newNewCount =
-    contextBefore.length + hunk.additions.length + contextAfter.length;
+    finalContextBefore.length + hunk.additions.length + contextAfter.length;
   // Handle edge case where count is 0 (e.g., adding to an empty file) - header format needs >= 1
   const finalOriginalStart = Math.max(1, newOriginalStart);
@@ -421,7 +634,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
     lines: newHunkLines,
     additions: hunk.additions, // Keep original intended changes
     subtractions: hunk.subtractions, // Keep original intended changes
-    contextLines: [...contextBefore, ...contextAfter], // Store the newly generated context
+    contextLines: [...finalContextBefore, ...contextAfter], // Store the newly generated context
   };
   // 6. Filter out empty hunks

package/src/processors/CustomVariables.ts CHANGED Viewed

@@ -325,13 +325,30 @@ export class CustomVariables {
     minRepetitions?: number;  // Minimum occurrences to trigger hint (default: 2)
     minSubstringLength?: number; // Minimum repeated substring length (default: 50)
     recentMessagesWindow?: number; // Only scan the last N messages (default: 10)
+    throttleMessages?: number; // Only emit hint once per N new messages (default: 5)
+    maxExamples?: number;     // Max number of example variables to show (default: 3)
+    hintMessageTokens?: number; // Estimated tokens in the hint message itself for net savings calc (default: 190)
   } = {}): MessageProcessorFunction {
     const minLength = options.minLength ?? 50;
     const minRepetitions = options.minRepetitions ?? 2;
     const minSubstringLength = options.minSubstringLength ?? 50;
     const recentMessagesWindow = options.recentMessagesWindow ?? 10;
+    const throttleMessages = options.throttleMessages ?? 5;
+    const maxExamples = options.maxExamples ?? 3;
+    // ~100 base + 30 per example = ~190 tokens for the hint message itself
+    const hintMessageTokens = options.hintMessageTokens ?? (100 + maxExamples * 30);
+    // Throttle state: track message count at last hint emission
+    let lastHintAtMessageCount = -Infinity;
     return async (originalMessages: Message[], modifiedMessages: Message[]) => {
+      // Throttle: only emit hint if enough new messages have been added since last hint
+      const currentMessageCount = modifiedMessages.length;
+      if (currentMessageCount - lastHintAtMessageCount < throttleMessages) {
+        return;
+      }
       // Count occurrences of each string value across all tool call arguments
       const stringCounts = new Map<string, { count: number; toolNames: Set<string> }>();
@@ -391,8 +408,11 @@ export class CustomVariables {
       // Find entries that exceed the repetition threshold
       const repeatedTools: string[] = [];
+      const repeatedEntries: Array<{ str: string; count: number; toolNames: Set<string> }> = [];
       for (const [str, info] of stringCounts.entries()) {
         if (info.count >= minRepetitions) {
+          repeatedEntries.push({ str, count: info.count, toolNames: info.toolNames });
           for (const toolName of info.toolNames) {
             if (!repeatedTools.includes(toolName)) {
               repeatedTools.push(toolName);
@@ -402,12 +422,46 @@ export class CustomVariables {
       }
       if (repeatedTools.length > 0) {
+        lastHintAtMessageCount = currentMessageCount;
+        // Sort by (count * str.length) desc to surface highest-savings items first
+        repeatedEntries.sort((a, b) => b.count * b.str.length - a.count * a.str.length);
+        // Estimate token savings: chars_saved ÷ 4 (rough tokens-per-char estimate)
+        // Savings = (repetitions - 1) * str.length chars saved by using a short variable ref
+        let totalCharsSaved = 0;
+        for (const { str, count } of repeatedEntries) {
+          totalCharsSaved += (count - 1) * str.length;
+        }
+        const grossTokensSaved = Math.round(totalCharsSaved / 4);
+        const netTokensSaved = grossTokensSaved - hintMessageTokens;
+        // Skip the hint if the net savings are negative — the reminder costs more than it saves
+        if (netTokensSaved <= 0) {
+          return;
+        }
+        // Build example variable suggestions
+        const examples = repeatedEntries.slice(0, maxExamples).map(({ str, count, toolNames }, i) => {
+          const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
+          const ellipsis = str.length > 80 ? "…" : "";
+          const varName = `var${i + 1}`;
+          const charsSaved = (count - 1) * str.length;
+          const tokensSaved = Math.round(charsSaved / 4);
+          return (
+            `  • \`${varName}\` (used ${count}x in ${[...toolNames].join(", ")}, ~${tokensSaved} tokens saveable): "${preview}${ellipsis}"`
+          );
+        });
         modifiedMessages.push({
           role: "user",
           content:
-            `⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")}. ` +
+            `⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")} ` +
+            `(~${grossTokensSaved} tokens saveable, ~${netTokensSaved} net after this reminder). ` +
             `Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`, ` +
-            `then reference them via {{variableName}} in future tool calls to avoid re-outputting large strings.`,
+            `then reference them via {{variableName}} in future tool calls.\n` +
+            `Top repeated values to consider storing as variables:\n` +
+            examples.join("\n"),
         });
       }
     };