@tyvm/knowhow 0.0.114 → 0.0.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.114",
3
+ "version": "0.0.116",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ts-node
2
2
  /**
3
3
  * Test script: runs the repetition hint processor logic against a real agent metadata file
4
- * and prints whether the hint would fire and why/why not.
4
+ * and prints whether the hint would fire and why/why not, with token savings estimates.
5
5
  *
6
6
  * Usage:
7
7
  * npx ts-node scripts/test-repetition-hint.ts [path-to-metadata.json]
@@ -76,7 +76,7 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
76
76
  for (let i = 0; i < a.length - minLength + 1; i++) {
77
77
  for (let j = a.length; j > i + minLength - 1; j--) {
78
78
  const sub = a.slice(i, j);
79
- if (sub.length <= best.length) break; // already found longer, skip shorter
79
+ if (sub.length <= best.length) break;
80
80
  if (b.includes(sub)) {
81
81
  best = sub;
82
82
  break;
@@ -86,12 +86,18 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
86
86
  return best.length >= minLength ? best : null;
87
87
  }
88
88
 
89
+ interface ProcessorResult {
90
+ wouldHint: boolean;
91
+ repeatedTools: string[];
92
+ details: Map<string, { count: number; tools: Set<string> }>;
93
+ }
94
+
89
95
  function runProcessor(
90
96
  messages: Message[],
91
97
  minLength = 50,
92
98
  minRepetitions = 2,
93
99
  minSubstringLength = 50
94
- ): { wouldHint: boolean; repeatedTools: string[]; details: Map<string, { count: number; tools: Set<string> }> } {
100
+ ): ProcessorResult {
95
101
  const stringCounts = new Map<string, { count: number; tools: Set<string> }>();
96
102
  const toolStrings = collectToolCallStrings(messages, minLength);
97
103
 
@@ -107,13 +113,12 @@ function runProcessor(
107
113
  }
108
114
 
109
115
  // Step 2: repeated substrings across different full strings
110
- // e.g. the same JWT embedded in many different commands
111
116
  const substringCounts = new Map<string, { count: number; tools: Set<string> }>();
112
117
  for (let i = 0; i < toolStrings.length; i++) {
113
118
  for (let j = i + 1; j < toolStrings.length; j++) {
114
119
  const a = toolStrings[i];
115
120
  const b = toolStrings[j];
116
- if (a.value === b.value) continue; // already handled by exact match
121
+ if (a.value === b.value) continue;
117
122
  const common = longestCommonSubstring(a.value, b.value, minSubstringLength);
118
123
  if (common) {
119
124
  const existing = substringCounts.get(common);
@@ -128,7 +133,7 @@ function runProcessor(
128
133
  }
129
134
  }
130
135
 
131
- // Merge substring counts: count = number of unique pairs, count+1 = number of occurrences
136
+ // Merge substring counts
132
137
  for (const [sub, info] of substringCounts.entries()) {
133
138
  if (info.count + 1 >= minRepetitions && !stringCounts.has(sub)) {
134
139
  stringCounts.set(sub, { count: info.count + 1, tools: info.tools });
@@ -148,6 +153,26 @@ function runProcessor(
148
153
  return { wouldHint: repeatedTools.length > 0, repeatedTools, details: stringCounts };
149
154
  }
150
155
 
156
+ /**
157
+ * Estimate tokens saved by using variables for repeated strings.
158
+ * Savings = (repetitions - 1) * str.length chars / 4 chars-per-token
159
+ * Minus the cost of the reminder message itself (estimated tokens in hint message).
160
+ */
161
+ function estimateNetTokenSavings(
162
+ details: Map<string, { count: number; tools: Set<string> }>,
163
+ hintMessageTokens: number
164
+ ): { gross: number; net: number } {
165
+ let totalCharsSaved = 0;
166
+ for (const [str, info] of details.entries()) {
167
+ if (info.count >= 2) {
168
+ totalCharsSaved += (info.count - 1) * str.length;
169
+ }
170
+ }
171
+ const gross = Math.round(totalCharsSaved / 4);
172
+ const net = gross - hintMessageTokens;
173
+ return { gross, net };
174
+ }
175
+
151
176
  // ---- Main ----
152
177
 
153
178
  const raw = fs.readFileSync(metadataPath, "utf-8");
@@ -158,6 +183,13 @@ console.log(`\n=== Repetition Hint Processor Test ===`);
158
183
  console.log(`File: ${metadataPath}`);
159
184
  console.log(`Threads: ${threads.length}`);
160
185
 
186
+ // Approximate tokens in the hint message itself (the reminder we send to the agent)
187
+ // ~100 tokens for the base message + ~30 per example
188
+ const HINT_BASE_TOKENS = 100;
189
+ const HINT_TOKENS_PER_EXAMPLE = 30;
190
+ const MAX_EXAMPLES = 3;
191
+ const HINT_MESSAGE_TOKENS = HINT_BASE_TOKENS + MAX_EXAMPLES * HINT_TOKENS_PER_EXAMPLE;
192
+
161
193
  for (let ti = 0; ti < threads.length; ti++) {
162
194
  const thread = threads[ti];
163
195
  const toolCallMsgs = thread.filter((m) => m.tool_calls && m.tool_calls.length > 0);
@@ -177,19 +209,55 @@ for (let ti = 0; ti < threads.length; ti++) {
177
209
  const newResult = runProcessor(thread, 50, 2, 50);
178
210
  if (newResult.wouldHint) {
179
211
  console.log(`✅ Would hint! Repeated tools: ${newResult.repeatedTools.join(", ")}`);
180
- // Show top repeated substrings
212
+
213
+ const { gross, net } = estimateNetTokenSavings(newResult.details, HINT_MESSAGE_TOKENS);
214
+ console.log(`\n 💰 Token savings estimate:`);
215
+ console.log(` Gross savings (repeated chars ÷ 4) : ~${gross} tokens`);
216
+ console.log(` Cost of reminder message : ~${HINT_MESSAGE_TOKENS} tokens`);
217
+ console.log(` Net savings : ~${net} tokens`);
218
+
219
+ // Sort by impact (count * length) descending
181
220
  const repeated = Array.from(newResult.details.entries())
182
221
  .filter(([, info]) => info.count >= 2)
183
- .sort((a, b) => b[1].count - a[1].count)
222
+ .sort((a, b) => (b[1].count * b[0].length) - (a[1].count * a[0].length))
184
223
  .slice(0, 5);
185
- console.log(`\n Top repeated values (count, tools, preview):`);
186
- for (const [str, info] of repeated) {
187
- console.log(` count=${info.count}, tools=${[...info.tools].join(",")}`);
188
- console.log(` value=${JSON.stringify(str.slice(0, 120))}`);
189
- }
224
+
225
+ console.log(`\n Top repeated values to store as variables (sorted by token impact):`);
226
+ repeated.forEach(([str, info], i) => {
227
+ const charsSaved = (info.count - 1) * str.length;
228
+ const toksSaved = Math.round(charsSaved / 4);
229
+ const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
230
+ const ellipsis = str.length > 80 ? "…" : "";
231
+ console.log(`\n [var${i + 1}]`);
232
+ console.log(` count : ${info.count}x`);
233
+ console.log(` tools : ${[...info.tools].join(", ")}`);
234
+ console.log(` ~savings : ${toksSaved} tokens (${charsSaved} chars)`);
235
+ console.log(` value : "${preview}${ellipsis}"`);
236
+ if (str.length > 80) {
237
+ console.log(` (full len: ${str.length} chars)`);
238
+ }
239
+ });
240
+
241
+ // Show what the actual hint message would look like
242
+ const examples = repeated.slice(0, MAX_EXAMPLES).map(([str, info], i) => {
243
+ const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
244
+ const ellipsis = str.length > 80 ? "…" : "";
245
+ const toksSaved = Math.round(((info.count - 1) * str.length) / 4);
246
+ return ` • \`var${i + 1}\` (used ${info.count}x in ${[...info.tools].join(", ")}, ~${toksSaved} tokens saveable): "${preview}${ellipsis}"`;
247
+ });
248
+ console.log(`\n Example hint message that would be shown to the agent:`);
249
+ console.log(` ---`);
250
+ console.log(
251
+ ` ⚠️ Tool inputs have large repetitions detected in: ${newResult.repeatedTools.join(", ")} ` +
252
+ `(~${gross} output tokens could be saved, ~${net} net after this reminder).\n` +
253
+ ` Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`,\n` +
254
+ ` then reference them via {{variableName}} in future tool calls.\n` +
255
+ ` Top repeated values to consider storing as variables:\n` +
256
+ examples.join("\n")
257
+ );
258
+ console.log(` ---`);
190
259
  } else {
191
260
  console.log(`❌ Would NOT hint.`);
192
- // Show top large strings for diagnosis
193
261
  const toolStrings = collectToolCallStrings(thread, 50);
194
262
  console.log(`\n Total large strings in tool calls: ${toolStrings.length}`);
195
263
  const top = toolStrings.slice(0, 3);
@@ -250,6 +250,11 @@ const CONTEXT_LINES = 3; // Standard number of context lines
250
250
  function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
251
251
  const originalLines = splitByNewLines(originalContent);
252
252
 
253
+ // Special case: pure creation hunk on empty file (@@ -0,0 +1,N @@)
254
+ if (originalContent === "" && hunk.originalStartLine === 0 && hunk.originalLineCount === 0) {
255
+ return hunk; // Already valid, pass through as-is
256
+ }
257
+
253
258
  const deletionLinesContent = hunk.subtractions.map((l) => l.slice(1));
254
259
  const additionLinesContent = hunk.additions.map((l) => l.slice(1));
255
260
 
@@ -267,6 +272,16 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
267
272
  `Anchor found via deletion sequence at line ${actualOriginalStartLine}`
268
273
  );
269
274
  }
275
+
276
+ // 1b. If full sequence not found (non-contiguous deletions), anchor on first deletion alone
277
+ if (actualOriginalStartLine === -1) {
278
+ const firstDeletionLines = findAllLineNumbers(originalContent, deletionLinesContent[0]);
279
+ const closest = findClosestNumber(firstDeletionLines, hunk.originalStartLine);
280
+ if (closest !== undefined) {
281
+ actualOriginalStartLine = closest;
282
+ console.log(`Anchor found via first deletion line at line ${actualOriginalStartLine}`);
283
+ }
284
+ }
270
285
  }
271
286
 
272
287
  // 2. If deletions didn't anchor, try anchoring using context *before* the first change
@@ -351,30 +366,228 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
351
366
  // Ensure start line is at least 1
352
367
  actualOriginalStartLine = Math.max(1, actualOriginalStartLine);
353
368
 
354
- // 4. Reconstruct the hunk with correct context
355
- const contextBeforeStartLine = Math.max(
356
- 0,
357
- actualOriginalStartLine - CONTEXT_LINES - 1
358
- ); // 0-based index
359
- const contextBeforeEndLine = Math.max(0, actualOriginalStartLine - 1); // 0-based index
360
- const contextBefore = originalLines
361
- .slice(contextBeforeStartLine, contextBeforeEndLine)
362
- .map((l) => ` ${l}`);
363
-
364
- // End line of original content affected by deletions (1-based)
365
- const originalContentEndLine =
366
- actualOriginalStartLine + deletionLinesContent.length;
367
- const contextAfterStartLine = originalContentEndLine - 1; // 0-based index
368
- const contextAfterEndLine = Math.min(
369
- originalLines.length,
370
- contextAfterStartLine + CONTEXT_LINES
371
- ); // 0-based index
372
- const contextAfter = originalLines
373
- .slice(contextAfterStartLine, contextAfterEndLine)
374
- .map((l) => ` ${l}`);
369
+ // 4a. Detect interleaved hunks (context lines between change blocks)
370
+ // If so, preserve original body order, just filter ghost context lines and fix header
371
+ let hasInterleavedChanges = false;
372
+ let seenChange = false;
373
+ let seenContextAfterChange = false;
374
+ for (const line of hunk.lines) {
375
+ if (line.startsWith("+") || line.startsWith("-")) {
376
+ if (seenContextAfterChange) { hasInterleavedChanges = true; break; }
377
+ seenChange = true;
378
+ } else if (line.startsWith(" ") && seenChange) {
379
+ seenContextAfterChange = true;
380
+ }
381
+ }
382
+ // Check if there are more changes after a context block
383
+ if (seenContextAfterChange) {
384
+ for (const line of hunk.lines.slice(hunk.lines.findIndex((l, i) => {
385
+ let sc = false;
386
+ for (let j = 0; j <= i; j++) {
387
+ if (hunk.lines[j].startsWith("+") || hunk.lines[j].startsWith("-")) sc = true;
388
+ if (sc && hunk.lines[j].startsWith(" ") && j === i) return true;
389
+ }
390
+ return false;
391
+ }))) {
392
+ if (line.startsWith("+") || line.startsWith("-")) { hasInterleavedChanges = true; break; }
393
+ }
394
+ }
395
+
396
+ if (hasInterleavedChanges) {
397
+ // Group hunk lines into change-blocks separated by context
398
+ // Each block: { deletions, additions }
399
+ type ChangeBlock = { deletions: string[]; additions: string[]; };
400
+ const blocks: ChangeBlock[] = [];
401
+ let curBlock: ChangeBlock = { deletions: [], additions: [] };
402
+ let inBlock = false;
403
+ for (const line of hunk.lines) {
404
+ if (line.startsWith("-")) { curBlock.deletions.push(line.slice(1)); inBlock = true; }
405
+ else if (line.startsWith("+")) { curBlock.additions.push(line.slice(1)); inBlock = true; }
406
+ else if (inBlock) {
407
+ blocks.push(curBlock);
408
+ curBlock = { deletions: [], additions: [] };
409
+ inBlock = false;
410
+ }
411
+ }
412
+ if (inBlock || curBlock.deletions.length > 0 || curBlock.additions.length > 0) blocks.push(curBlock);
413
+
414
+ // For each block, try to apply as line-level or substring replacement
415
+ let resultLines = [...originalLines];
416
+ let lineOffset = 0;
417
+ let anyApplied = false;
418
+ for (const block of blocks) {
419
+ if (block.deletions.length === 0) continue;
420
+ // Try exact line match first
421
+ const seqIdx = findSequenceIndex(resultLines, block.deletions);
422
+ if (seqIdx !== -1) {
423
+ resultLines = [
424
+ ...resultLines.slice(0, seqIdx),
425
+ ...block.additions,
426
+ ...resultLines.slice(seqIdx + block.deletions.length),
427
+ ];
428
+ lineOffset += block.additions.length - block.deletions.length;
429
+ anyApplied = true;
430
+ continue;
431
+ }
432
+ // Try substring replacement: find a line containing all deletion content
433
+ const delContent = block.deletions.join(" ").trim();
434
+ const addContent = block.additions.join(" ").trim();
435
+ const matchIdx = resultLines.findIndex((l) => l.includes(delContent.split(" ")[0]) && block.deletions.every((d) => l.includes(d.trim())));
436
+ if (matchIdx !== -1) {
437
+ let newLine = resultLines[matchIdx];
438
+ for (let i = 0; i < block.deletions.length; i++) {
439
+ newLine = newLine.replace(block.deletions[i].trim(), block.additions[i]?.trim() ?? "");
440
+ }
441
+ resultLines = [...resultLines.slice(0, matchIdx), newLine, ...resultLines.slice(matchIdx + 1)];
442
+ anyApplied = true;
443
+ }
444
+ }
445
+
446
+ if (anyApplied) {
447
+ // Build a replacement patch from original -> result
448
+ const origStr = originalLines.join("\n");
449
+ const newStr = resultLines.join("\n");
450
+ // Find first differing line
451
+ let firstDiff = 0;
452
+ while (firstDiff < originalLines.length && firstDiff < resultLines.length && originalLines[firstDiff] === resultLines[firstDiff]) firstDiff++;
453
+ let lastDiffOrig = originalLines.length - 1;
454
+ let lastDiffNew = resultLines.length - 1;
455
+ while (lastDiffOrig > firstDiff && lastDiffNew > firstDiff && originalLines[lastDiffOrig] === resultLines[lastDiffNew]) { lastDiffOrig--; lastDiffNew--; }
456
+ const ctxStart = Math.max(0, firstDiff - 1);
457
+ const ctxEndOrig = Math.min(originalLines.length - 1, lastDiffOrig + 1);
458
+ const ctxEndNew = Math.min(resultLines.length - 1, lastDiffNew + 1);
459
+ const patchLines: string[] = [];
460
+ for (let i = ctxStart; i <= ctxEndOrig; i++) {
461
+ if (i >= firstDiff && i <= lastDiffOrig) patchLines.push(`-${originalLines[i]}`);
462
+ else patchLines.push(` ${originalLines[i]}`);
463
+ }
464
+ // Insert additions at right position
465
+ const finalLines: string[] = [];
466
+ for (let i = ctxStart; i <= ctxEndOrig; i++) {
467
+ if (i >= firstDiff && i <= lastDiffOrig) { finalLines.push(`-${originalLines[i]}`); }
468
+ else finalLines.push(` ${originalLines[i]}`);
469
+ }
470
+ // Add additions after last deletion
471
+ for (let i = firstDiff; i <= lastDiffNew; i++) {
472
+ if (i >= firstDiff && i <= lastDiffNew && (i > lastDiffOrig || originalLines[i] !== resultLines[i])) {
473
+ if (!finalLines.some((l) => l === `+${resultLines[i]}`)) finalLines.push(`+${resultLines[i]}`);
474
+ }
475
+ }
476
+ const origCount2 = finalLines.filter((l) => !l.startsWith("+")).length;
477
+ const newCount2 = finalLines.filter((l) => !l.startsWith("-")).length;
478
+ const newHeader2 = `@@ -${ctxStart + 1},${origCount2} +${ctxStart + 1},${newCount2} @@`;
479
+ return {
480
+ header: newHeader2,
481
+ originalStartLine: ctxStart + 1,
482
+ originalLineCount: origCount2,
483
+ newStartLine: ctxStart + 1,
484
+ newLineCount: newCount2,
485
+ lines: finalLines,
486
+ additions: finalLines.filter((l) => l.startsWith("+")),
487
+ subtractions: finalLines.filter((l) => l.startsWith("-")),
488
+ contextLines: finalLines.filter((l) => l.startsWith(" ")),
489
+ };
490
+ }
491
+
492
+ // Fallback: filter valid lines and return
493
+ const validLines = hunk.lines.filter((l) => {
494
+ if (l.startsWith("+") || l.startsWith("-")) return true;
495
+ if (!l.startsWith(" ") && l.trim() !== "") return false;
496
+ const content = l.startsWith(" ") ? l.slice(1) : l;
497
+ if (content.trim() === "") return originalLines.includes(content);
498
+ return originalLines.some((fl) => fl.trim() === content.trim());
499
+ }).map((l) => (!l.startsWith("+") && !l.startsWith("-") && !l.startsWith(" ")) ? ` ${l}` : l);
500
+ const origCount = validLines.filter((l) => !l.startsWith("+")).length;
501
+ const newCount = validLines.filter((l) => !l.startsWith("-")).length;
502
+ const newHeader = `@@ -${actualOriginalStartLine},${origCount} +${actualOriginalStartLine},${newCount} @@`;
503
+ return {
504
+ header: newHeader,
505
+ originalStartLine: actualOriginalStartLine,
506
+ originalLineCount: origCount,
507
+ newStartLine: actualOriginalStartLine,
508
+ newLineCount: newCount,
509
+ lines: validLines,
510
+ additions: hunk.additions,
511
+ subtractions: hunk.subtractions,
512
+ contextLines: validLines.filter((l) => !l.startsWith("+") && !l.startsWith("-")),
513
+ };
514
+ }
515
+
516
+ // Pure insertion: output minimal -N,0 format required by unified diff spec
517
+ if (deletionLinesContent.length === 0 && additionLinesContent.length > 0) {
518
+ const pureHeader = `@@ -${actualOriginalStartLine},0 +${actualOriginalStartLine},${hunk.additions.length} @@`;
519
+ return {
520
+ header: pureHeader,
521
+ originalStartLine: actualOriginalStartLine,
522
+ originalLineCount: 0,
523
+ newStartLine: actualOriginalStartLine,
524
+ newLineCount: hunk.additions.length,
525
+ lines: hunk.additions,
526
+ additions: hunk.additions,
527
+ subtractions: [],
528
+ contextLines: [],
529
+ };
530
+ }
531
+
532
+ // 4. Extract context lines from the original hunk body
533
+ const hunkContextBefore: string[] = [];
534
+ const hunkContextAfter: string[] = [];
535
+ let pastChanges = false;
536
+ for (const line of hunk.lines) {
537
+ if (line.startsWith("+") || line.startsWith("-")) {
538
+ pastChanges = true;
539
+ } else if (line.startsWith(" ")) {
540
+ if (!pastChanges) hunkContextBefore.push(line);
541
+ else hunkContextAfter.push(line);
542
+ }
543
+ }
544
+
545
+ // Validate context lines against the file (reject ghost lines not present in file)
546
+ // Replace context lines with the actual line from the file to fix indentation divergence
547
+ const validContextBefore = hunkContextBefore
548
+ .map((l) => {
549
+ const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
550
+ return match !== undefined ? ` ${match}` : null;
551
+ })
552
+ .filter((l): l is string => l !== null);
553
+ const validContextAfter = hunkContextAfter
554
+ .map((l) => {
555
+ const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
556
+ return match !== undefined ? ` ${match}` : null;
557
+ })
558
+ .filter((l): l is string => l !== null);
559
+
560
+ // Supplement: add 1 extra line before the valid context for better anchoring
561
+ const supplementBeforeIdx = actualOriginalStartLine - 1 - validContextBefore.length - 1; // 0-based
562
+ const supplementBefore: string[] =
563
+ supplementBeforeIdx >= 0
564
+ ? [` ${originalLines[supplementBeforeIdx]}`]
565
+ : [];
566
+
567
+ const contextBefore = [...supplementBefore, ...validContextBefore];
568
+
569
+ // For context after: use valid context from hunk; if none, take 1 line from file
570
+ const originalContentEndLine = actualOriginalStartLine + deletionLinesContent.length;
571
+ let contextAfter: string[];
572
+ if (deletionLinesContent.length === 0) {
573
+ // Pure insertion: always take the line at the insertion point from file (don't trust hunk context position)
574
+ const afterIdx = actualOriginalStartLine - 1; // 0-based index of line at insertion point
575
+ contextAfter = afterIdx < originalLines.length ? [` ${originalLines[afterIdx]}`] : [];
576
+ } else {
577
+ contextAfter = validContextAfter;
578
+ if (contextAfter.length === 0) {
579
+ const afterIdx = originalContentEndLine - 1; // 0-based index after deletions
580
+ if (afterIdx < originalLines.length) {
581
+ contextAfter = [` ${originalLines[afterIdx]}`];
582
+ }
583
+ }
584
+ }
585
+
586
+ // For pure-insertion hunks (no deletions), don't supplement before - keep only hunk context
587
+ const finalContextBefore = deletionLinesContent.length === 0 ? validContextBefore : contextBefore;
375
588
 
376
589
  const newHunkLines = [
377
- ...contextBefore,
590
+ ...finalContextBefore,
378
591
  ...hunk.subtractions, // Use the original subtraction lines from the input hunk
379
592
  ...hunk.additions, // Use the original addition lines from the input hunk
380
593
  ...contextAfter,
@@ -382,11 +595,11 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
382
595
 
383
596
  // 5. Recalculate the header
384
597
  const newOriginalStart =
385
- contextBefore.length > 0
386
- ? actualOriginalStartLine - contextBefore.length
598
+ finalContextBefore.length > 0
599
+ ? actualOriginalStartLine - finalContextBefore.length
387
600
  : actualOriginalStartLine;
388
601
  const newOriginalCount =
389
- contextBefore.length + hunk.subtractions.length + contextAfter.length;
602
+ finalContextBefore.length + hunk.subtractions.length + contextAfter.length;
390
603
 
391
604
  // The new start line depends on how many lines were added/removed *before* this hunk.
392
605
  // For an isolated hunk fix, we often just base it on the original start.
@@ -394,7 +607,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
394
607
  // Let's keep it simple and relative to the original start for now.
395
608
  const newNewStart = newOriginalStart; // Simplification: Assume start line number matches original unless offset by prior hunks (which we don't know here)
396
609
  const newNewCount =
397
- contextBefore.length + hunk.additions.length + contextAfter.length;
610
+ finalContextBefore.length + hunk.additions.length + contextAfter.length;
398
611
 
399
612
  // Handle edge case where count is 0 (e.g., adding to an empty file) - header format needs >= 1
400
613
  const finalOriginalStart = Math.max(1, newOriginalStart);
@@ -421,7 +634,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
421
634
  lines: newHunkLines,
422
635
  additions: hunk.additions, // Keep original intended changes
423
636
  subtractions: hunk.subtractions, // Keep original intended changes
424
- contextLines: [...contextBefore, ...contextAfter], // Store the newly generated context
637
+ contextLines: [...finalContextBefore, ...contextAfter], // Store the newly generated context
425
638
  };
426
639
 
427
640
  // 6. Filter out empty hunks
@@ -325,13 +325,30 @@ export class CustomVariables {
325
325
  minRepetitions?: number; // Minimum occurrences to trigger hint (default: 2)
326
326
  minSubstringLength?: number; // Minimum repeated substring length (default: 50)
327
327
  recentMessagesWindow?: number; // Only scan the last N messages (default: 10)
328
+ throttleMessages?: number; // Only emit hint once per N new messages (default: 5)
329
+ maxExamples?: number; // Max number of example variables to show (default: 3)
330
+ hintMessageTokens?: number; // Estimated tokens in the hint message itself for net savings calc (default: 190)
328
331
  } = {}): MessageProcessorFunction {
329
332
  const minLength = options.minLength ?? 50;
330
333
  const minRepetitions = options.minRepetitions ?? 2;
331
334
  const minSubstringLength = options.minSubstringLength ?? 50;
332
335
  const recentMessagesWindow = options.recentMessagesWindow ?? 10;
336
+ const throttleMessages = options.throttleMessages ?? 5;
337
+ const maxExamples = options.maxExamples ?? 3;
338
+
339
+ // ~100 base + 30 per example = ~190 tokens for the hint message itself
340
+ const hintMessageTokens = options.hintMessageTokens ?? (100 + maxExamples * 30);
341
+
342
+ // Throttle state: track message count at last hint emission
343
+ let lastHintAtMessageCount = -Infinity;
333
344
 
334
345
  return async (originalMessages: Message[], modifiedMessages: Message[]) => {
346
+ // Throttle: only emit hint if enough new messages have been added since last hint
347
+ const currentMessageCount = modifiedMessages.length;
348
+ if (currentMessageCount - lastHintAtMessageCount < throttleMessages) {
349
+ return;
350
+ }
351
+
335
352
  // Count occurrences of each string value across all tool call arguments
336
353
  const stringCounts = new Map<string, { count: number; toolNames: Set<string> }>();
337
354
 
@@ -391,8 +408,11 @@ export class CustomVariables {
391
408
 
392
409
  // Find entries that exceed the repetition threshold
393
410
  const repeatedTools: string[] = [];
411
+ const repeatedEntries: Array<{ str: string; count: number; toolNames: Set<string> }> = [];
412
+
394
413
  for (const [str, info] of stringCounts.entries()) {
395
414
  if (info.count >= minRepetitions) {
415
+ repeatedEntries.push({ str, count: info.count, toolNames: info.toolNames });
396
416
  for (const toolName of info.toolNames) {
397
417
  if (!repeatedTools.includes(toolName)) {
398
418
  repeatedTools.push(toolName);
@@ -402,12 +422,46 @@ export class CustomVariables {
402
422
  }
403
423
 
404
424
  if (repeatedTools.length > 0) {
425
+ lastHintAtMessageCount = currentMessageCount;
426
+
427
+ // Sort by (count * str.length) desc to surface highest-savings items first
428
+ repeatedEntries.sort((a, b) => b.count * b.str.length - a.count * a.str.length);
429
+
430
+ // Estimate token savings: chars_saved ÷ 4 (rough tokens-per-char estimate)
431
+ // Savings = (repetitions - 1) * str.length chars saved by using a short variable ref
432
+ let totalCharsSaved = 0;
433
+ for (const { str, count } of repeatedEntries) {
434
+ totalCharsSaved += (count - 1) * str.length;
435
+ }
436
+ const grossTokensSaved = Math.round(totalCharsSaved / 4);
437
+ const netTokensSaved = grossTokensSaved - hintMessageTokens;
438
+
439
+ // Skip the hint if the net savings are negative — the reminder costs more than it saves
440
+ if (netTokensSaved <= 0) {
441
+ return;
442
+ }
443
+
444
+ // Build example variable suggestions
445
+ const examples = repeatedEntries.slice(0, maxExamples).map(({ str, count, toolNames }, i) => {
446
+ const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
447
+ const ellipsis = str.length > 80 ? "…" : "";
448
+ const varName = `var${i + 1}`;
449
+ const charsSaved = (count - 1) * str.length;
450
+ const tokensSaved = Math.round(charsSaved / 4);
451
+ return (
452
+ ` • \`${varName}\` (used ${count}x in ${[...toolNames].join(", ")}, ~${tokensSaved} tokens saveable): "${preview}${ellipsis}"`
453
+ );
454
+ });
455
+
405
456
  modifiedMessages.push({
406
457
  role: "user",
407
458
  content:
408
- `⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")}. ` +
459
+ `⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")} ` +
460
+ `(~${grossTokensSaved} tokens saveable, ~${netTokensSaved} net after this reminder). ` +
409
461
  `Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`, ` +
410
- `then reference them via {{variableName}} in future tool calls to avoid re-outputting large strings.`,
462
+ `then reference them via {{variableName}} in future tool calls.\n` +
463
+ `Top repeated values to consider storing as variables:\n` +
464
+ examples.join("\n"),
411
465
  });
412
466
  }
413
467
  };