@tyvm/knowhow 0.0.114 → 0.0.115
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/test-repetition-hint.ts +82 -14
- package/src/agents/tools/patch.ts +240 -27
- package/src/processors/CustomVariables.ts +51 -2
- package/tests/patching/regression-2026.test.ts +283 -0
- package/ts_build/package.json +1 -1
- package/ts_build/src/agents/tools/patch.js +235 -16
- package/ts_build/src/agents/tools/patch.js.map +1 -1
- package/ts_build/src/processors/CustomVariables.d.ts +3 -0
- package/ts_build/src/processors/CustomVariables.js +31 -2
- package/ts_build/src/processors/CustomVariables.js.map +1 -1
- package/ts_build/tests/patching/regression-2026.test.d.ts +1 -0
- package/ts_build/tests/patching/regression-2026.test.js +163 -0
- package/ts_build/tests/patching/regression-2026.test.js.map +1 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env ts-node
|
|
2
2
|
/**
|
|
3
3
|
* Test script: runs the repetition hint processor logic against a real agent metadata file
|
|
4
|
-
* and prints whether the hint would fire and why/why not.
|
|
4
|
+
* and prints whether the hint would fire and why/why not, with token savings estimates.
|
|
5
5
|
*
|
|
6
6
|
* Usage:
|
|
7
7
|
* npx ts-node scripts/test-repetition-hint.ts [path-to-metadata.json]
|
|
@@ -76,7 +76,7 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
|
|
|
76
76
|
for (let i = 0; i < a.length - minLength + 1; i++) {
|
|
77
77
|
for (let j = a.length; j > i + minLength - 1; j--) {
|
|
78
78
|
const sub = a.slice(i, j);
|
|
79
|
-
if (sub.length <= best.length) break;
|
|
79
|
+
if (sub.length <= best.length) break;
|
|
80
80
|
if (b.includes(sub)) {
|
|
81
81
|
best = sub;
|
|
82
82
|
break;
|
|
@@ -86,12 +86,18 @@ function longestCommonSubstring(a: string, b: string, minLength: number): string
|
|
|
86
86
|
return best.length >= minLength ? best : null;
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
interface ProcessorResult {
|
|
90
|
+
wouldHint: boolean;
|
|
91
|
+
repeatedTools: string[];
|
|
92
|
+
details: Map<string, { count: number; tools: Set<string> }>;
|
|
93
|
+
}
|
|
94
|
+
|
|
89
95
|
function runProcessor(
|
|
90
96
|
messages: Message[],
|
|
91
97
|
minLength = 50,
|
|
92
98
|
minRepetitions = 2,
|
|
93
99
|
minSubstringLength = 50
|
|
94
|
-
):
|
|
100
|
+
): ProcessorResult {
|
|
95
101
|
const stringCounts = new Map<string, { count: number; tools: Set<string> }>();
|
|
96
102
|
const toolStrings = collectToolCallStrings(messages, minLength);
|
|
97
103
|
|
|
@@ -107,13 +113,12 @@ function runProcessor(
|
|
|
107
113
|
}
|
|
108
114
|
|
|
109
115
|
// Step 2: repeated substrings across different full strings
|
|
110
|
-
// e.g. the same JWT embedded in many different commands
|
|
111
116
|
const substringCounts = new Map<string, { count: number; tools: Set<string> }>();
|
|
112
117
|
for (let i = 0; i < toolStrings.length; i++) {
|
|
113
118
|
for (let j = i + 1; j < toolStrings.length; j++) {
|
|
114
119
|
const a = toolStrings[i];
|
|
115
120
|
const b = toolStrings[j];
|
|
116
|
-
if (a.value === b.value) continue;
|
|
121
|
+
if (a.value === b.value) continue;
|
|
117
122
|
const common = longestCommonSubstring(a.value, b.value, minSubstringLength);
|
|
118
123
|
if (common) {
|
|
119
124
|
const existing = substringCounts.get(common);
|
|
@@ -128,7 +133,7 @@ function runProcessor(
|
|
|
128
133
|
}
|
|
129
134
|
}
|
|
130
135
|
|
|
131
|
-
// Merge substring counts
|
|
136
|
+
// Merge substring counts
|
|
132
137
|
for (const [sub, info] of substringCounts.entries()) {
|
|
133
138
|
if (info.count + 1 >= minRepetitions && !stringCounts.has(sub)) {
|
|
134
139
|
stringCounts.set(sub, { count: info.count + 1, tools: info.tools });
|
|
@@ -148,6 +153,26 @@ function runProcessor(
|
|
|
148
153
|
return { wouldHint: repeatedTools.length > 0, repeatedTools, details: stringCounts };
|
|
149
154
|
}
|
|
150
155
|
|
|
156
|
+
/**
|
|
157
|
+
* Estimate tokens saved by using variables for repeated strings.
|
|
158
|
+
* Savings = (repetitions - 1) * str.length chars / 4 chars-per-token
|
|
159
|
+
* Minus the cost of the reminder message itself (estimated tokens in hint message).
|
|
160
|
+
*/
|
|
161
|
+
function estimateNetTokenSavings(
|
|
162
|
+
details: Map<string, { count: number; tools: Set<string> }>,
|
|
163
|
+
hintMessageTokens: number
|
|
164
|
+
): { gross: number; net: number } {
|
|
165
|
+
let totalCharsSaved = 0;
|
|
166
|
+
for (const [str, info] of details.entries()) {
|
|
167
|
+
if (info.count >= 2) {
|
|
168
|
+
totalCharsSaved += (info.count - 1) * str.length;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const gross = Math.round(totalCharsSaved / 4);
|
|
172
|
+
const net = gross - hintMessageTokens;
|
|
173
|
+
return { gross, net };
|
|
174
|
+
}
|
|
175
|
+
|
|
151
176
|
// ---- Main ----
|
|
152
177
|
|
|
153
178
|
const raw = fs.readFileSync(metadataPath, "utf-8");
|
|
@@ -158,6 +183,13 @@ console.log(`\n=== Repetition Hint Processor Test ===`);
|
|
|
158
183
|
console.log(`File: ${metadataPath}`);
|
|
159
184
|
console.log(`Threads: ${threads.length}`);
|
|
160
185
|
|
|
186
|
+
// Approximate tokens in the hint message itself (the reminder we send to the agent)
|
|
187
|
+
// ~100 tokens for the base message + ~30 per example
|
|
188
|
+
const HINT_BASE_TOKENS = 100;
|
|
189
|
+
const HINT_TOKENS_PER_EXAMPLE = 30;
|
|
190
|
+
const MAX_EXAMPLES = 3;
|
|
191
|
+
const HINT_MESSAGE_TOKENS = HINT_BASE_TOKENS + MAX_EXAMPLES * HINT_TOKENS_PER_EXAMPLE;
|
|
192
|
+
|
|
161
193
|
for (let ti = 0; ti < threads.length; ti++) {
|
|
162
194
|
const thread = threads[ti];
|
|
163
195
|
const toolCallMsgs = thread.filter((m) => m.tool_calls && m.tool_calls.length > 0);
|
|
@@ -177,19 +209,55 @@ for (let ti = 0; ti < threads.length; ti++) {
|
|
|
177
209
|
const newResult = runProcessor(thread, 50, 2, 50);
|
|
178
210
|
if (newResult.wouldHint) {
|
|
179
211
|
console.log(`✅ Would hint! Repeated tools: ${newResult.repeatedTools.join(", ")}`);
|
|
180
|
-
|
|
212
|
+
|
|
213
|
+
const { gross, net } = estimateNetTokenSavings(newResult.details, HINT_MESSAGE_TOKENS);
|
|
214
|
+
console.log(`\n 💰 Token savings estimate:`);
|
|
215
|
+
console.log(` Gross savings (repeated chars ÷ 4) : ~${gross} tokens`);
|
|
216
|
+
console.log(` Cost of reminder message : ~${HINT_MESSAGE_TOKENS} tokens`);
|
|
217
|
+
console.log(` Net savings : ~${net} tokens`);
|
|
218
|
+
|
|
219
|
+
// Sort by impact (count * length) descending
|
|
181
220
|
const repeated = Array.from(newResult.details.entries())
|
|
182
221
|
.filter(([, info]) => info.count >= 2)
|
|
183
|
-
.sort((a, b) => b[1].count - a[1].count)
|
|
222
|
+
.sort((a, b) => (b[1].count * b[0].length) - (a[1].count * a[0].length))
|
|
184
223
|
.slice(0, 5);
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
224
|
+
|
|
225
|
+
console.log(`\n Top repeated values to store as variables (sorted by token impact):`);
|
|
226
|
+
repeated.forEach(([str, info], i) => {
|
|
227
|
+
const charsSaved = (info.count - 1) * str.length;
|
|
228
|
+
const toksSaved = Math.round(charsSaved / 4);
|
|
229
|
+
const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
|
|
230
|
+
const ellipsis = str.length > 80 ? "…" : "";
|
|
231
|
+
console.log(`\n [var${i + 1}]`);
|
|
232
|
+
console.log(` count : ${info.count}x`);
|
|
233
|
+
console.log(` tools : ${[...info.tools].join(", ")}`);
|
|
234
|
+
console.log(` ~savings : ${toksSaved} tokens (${charsSaved} chars)`);
|
|
235
|
+
console.log(` value : "${preview}${ellipsis}"`);
|
|
236
|
+
if (str.length > 80) {
|
|
237
|
+
console.log(` (full len: ${str.length} chars)`);
|
|
238
|
+
}
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
// Show what the actual hint message would look like
|
|
242
|
+
const examples = repeated.slice(0, MAX_EXAMPLES).map(([str, info], i) => {
|
|
243
|
+
const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
|
|
244
|
+
const ellipsis = str.length > 80 ? "…" : "";
|
|
245
|
+
const toksSaved = Math.round(((info.count - 1) * str.length) / 4);
|
|
246
|
+
return ` • \`var${i + 1}\` (used ${info.count}x in ${[...info.tools].join(", ")}, ~${toksSaved} tokens saveable): "${preview}${ellipsis}"`;
|
|
247
|
+
});
|
|
248
|
+
console.log(`\n Example hint message that would be shown to the agent:`);
|
|
249
|
+
console.log(` ---`);
|
|
250
|
+
console.log(
|
|
251
|
+
` ⚠️ Tool inputs have large repetitions detected in: ${newResult.repeatedTools.join(", ")} ` +
|
|
252
|
+
`(~${gross} output tokens could be saved, ~${net} net after this reminder).\n` +
|
|
253
|
+
` Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`,\n` +
|
|
254
|
+
` then reference them via {{variableName}} in future tool calls.\n` +
|
|
255
|
+
` Top repeated values to consider storing as variables:\n` +
|
|
256
|
+
examples.join("\n")
|
|
257
|
+
);
|
|
258
|
+
console.log(` ---`);
|
|
190
259
|
} else {
|
|
191
260
|
console.log(`❌ Would NOT hint.`);
|
|
192
|
-
// Show top large strings for diagnosis
|
|
193
261
|
const toolStrings = collectToolCallStrings(thread, 50);
|
|
194
262
|
console.log(`\n Total large strings in tool calls: ${toolStrings.length}`);
|
|
195
263
|
const top = toolStrings.slice(0, 3);
|
|
@@ -250,6 +250,11 @@ const CONTEXT_LINES = 3; // Standard number of context lines
|
|
|
250
250
|
function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
251
251
|
const originalLines = splitByNewLines(originalContent);
|
|
252
252
|
|
|
253
|
+
// Special case: pure creation hunk on empty file (@@ -0,0 +1,N @@)
|
|
254
|
+
if (originalContent === "" && hunk.originalStartLine === 0 && hunk.originalLineCount === 0) {
|
|
255
|
+
return hunk; // Already valid, pass through as-is
|
|
256
|
+
}
|
|
257
|
+
|
|
253
258
|
const deletionLinesContent = hunk.subtractions.map((l) => l.slice(1));
|
|
254
259
|
const additionLinesContent = hunk.additions.map((l) => l.slice(1));
|
|
255
260
|
|
|
@@ -267,6 +272,16 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
|
267
272
|
`Anchor found via deletion sequence at line ${actualOriginalStartLine}`
|
|
268
273
|
);
|
|
269
274
|
}
|
|
275
|
+
|
|
276
|
+
// 1b. If full sequence not found (non-contiguous deletions), anchor on first deletion alone
|
|
277
|
+
if (actualOriginalStartLine === -1) {
|
|
278
|
+
const firstDeletionLines = findAllLineNumbers(originalContent, deletionLinesContent[0]);
|
|
279
|
+
const closest = findClosestNumber(firstDeletionLines, hunk.originalStartLine);
|
|
280
|
+
if (closest !== undefined) {
|
|
281
|
+
actualOriginalStartLine = closest;
|
|
282
|
+
console.log(`Anchor found via first deletion line at line ${actualOriginalStartLine}`);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
270
285
|
}
|
|
271
286
|
|
|
272
287
|
// 2. If deletions didn't anchor, try anchoring using context *before* the first change
|
|
@@ -351,30 +366,228 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
|
351
366
|
// Ensure start line is at least 1
|
|
352
367
|
actualOriginalStartLine = Math.max(1, actualOriginalStartLine);
|
|
353
368
|
|
|
354
|
-
//
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
const
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
369
|
+
// 4a. Detect interleaved hunks (context lines between change blocks)
|
|
370
|
+
// If so, preserve original body order, just filter ghost context lines and fix header
|
|
371
|
+
let hasInterleavedChanges = false;
|
|
372
|
+
let seenChange = false;
|
|
373
|
+
let seenContextAfterChange = false;
|
|
374
|
+
for (const line of hunk.lines) {
|
|
375
|
+
if (line.startsWith("+") || line.startsWith("-")) {
|
|
376
|
+
if (seenContextAfterChange) { hasInterleavedChanges = true; break; }
|
|
377
|
+
seenChange = true;
|
|
378
|
+
} else if (line.startsWith(" ") && seenChange) {
|
|
379
|
+
seenContextAfterChange = true;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
// Check if there are more changes after a context block
|
|
383
|
+
if (seenContextAfterChange) {
|
|
384
|
+
for (const line of hunk.lines.slice(hunk.lines.findIndex((l, i) => {
|
|
385
|
+
let sc = false;
|
|
386
|
+
for (let j = 0; j <= i; j++) {
|
|
387
|
+
if (hunk.lines[j].startsWith("+") || hunk.lines[j].startsWith("-")) sc = true;
|
|
388
|
+
if (sc && hunk.lines[j].startsWith(" ") && j === i) return true;
|
|
389
|
+
}
|
|
390
|
+
return false;
|
|
391
|
+
}))) {
|
|
392
|
+
if (line.startsWith("+") || line.startsWith("-")) { hasInterleavedChanges = true; break; }
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (hasInterleavedChanges) {
|
|
397
|
+
// Group hunk lines into change-blocks separated by context
|
|
398
|
+
// Each block: { deletions, additions }
|
|
399
|
+
type ChangeBlock = { deletions: string[]; additions: string[]; };
|
|
400
|
+
const blocks: ChangeBlock[] = [];
|
|
401
|
+
let curBlock: ChangeBlock = { deletions: [], additions: [] };
|
|
402
|
+
let inBlock = false;
|
|
403
|
+
for (const line of hunk.lines) {
|
|
404
|
+
if (line.startsWith("-")) { curBlock.deletions.push(line.slice(1)); inBlock = true; }
|
|
405
|
+
else if (line.startsWith("+")) { curBlock.additions.push(line.slice(1)); inBlock = true; }
|
|
406
|
+
else if (inBlock) {
|
|
407
|
+
blocks.push(curBlock);
|
|
408
|
+
curBlock = { deletions: [], additions: [] };
|
|
409
|
+
inBlock = false;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
if (inBlock || curBlock.deletions.length > 0 || curBlock.additions.length > 0) blocks.push(curBlock);
|
|
413
|
+
|
|
414
|
+
// For each block, try to apply as line-level or substring replacement
|
|
415
|
+
let resultLines = [...originalLines];
|
|
416
|
+
let lineOffset = 0;
|
|
417
|
+
let anyApplied = false;
|
|
418
|
+
for (const block of blocks) {
|
|
419
|
+
if (block.deletions.length === 0) continue;
|
|
420
|
+
// Try exact line match first
|
|
421
|
+
const seqIdx = findSequenceIndex(resultLines, block.deletions);
|
|
422
|
+
if (seqIdx !== -1) {
|
|
423
|
+
resultLines = [
|
|
424
|
+
...resultLines.slice(0, seqIdx),
|
|
425
|
+
...block.additions,
|
|
426
|
+
...resultLines.slice(seqIdx + block.deletions.length),
|
|
427
|
+
];
|
|
428
|
+
lineOffset += block.additions.length - block.deletions.length;
|
|
429
|
+
anyApplied = true;
|
|
430
|
+
continue;
|
|
431
|
+
}
|
|
432
|
+
// Try substring replacement: find a line containing all deletion content
|
|
433
|
+
const delContent = block.deletions.join(" ").trim();
|
|
434
|
+
const addContent = block.additions.join(" ").trim();
|
|
435
|
+
const matchIdx = resultLines.findIndex((l) => l.includes(delContent.split(" ")[0]) && block.deletions.every((d) => l.includes(d.trim())));
|
|
436
|
+
if (matchIdx !== -1) {
|
|
437
|
+
let newLine = resultLines[matchIdx];
|
|
438
|
+
for (let i = 0; i < block.deletions.length; i++) {
|
|
439
|
+
newLine = newLine.replace(block.deletions[i].trim(), block.additions[i]?.trim() ?? "");
|
|
440
|
+
}
|
|
441
|
+
resultLines = [...resultLines.slice(0, matchIdx), newLine, ...resultLines.slice(matchIdx + 1)];
|
|
442
|
+
anyApplied = true;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
if (anyApplied) {
|
|
447
|
+
// Build a replacement patch from original -> result
|
|
448
|
+
const origStr = originalLines.join("\n");
|
|
449
|
+
const newStr = resultLines.join("\n");
|
|
450
|
+
// Find first differing line
|
|
451
|
+
let firstDiff = 0;
|
|
452
|
+
while (firstDiff < originalLines.length && firstDiff < resultLines.length && originalLines[firstDiff] === resultLines[firstDiff]) firstDiff++;
|
|
453
|
+
let lastDiffOrig = originalLines.length - 1;
|
|
454
|
+
let lastDiffNew = resultLines.length - 1;
|
|
455
|
+
while (lastDiffOrig > firstDiff && lastDiffNew > firstDiff && originalLines[lastDiffOrig] === resultLines[lastDiffNew]) { lastDiffOrig--; lastDiffNew--; }
|
|
456
|
+
const ctxStart = Math.max(0, firstDiff - 1);
|
|
457
|
+
const ctxEndOrig = Math.min(originalLines.length - 1, lastDiffOrig + 1);
|
|
458
|
+
const ctxEndNew = Math.min(resultLines.length - 1, lastDiffNew + 1);
|
|
459
|
+
const patchLines: string[] = [];
|
|
460
|
+
for (let i = ctxStart; i <= ctxEndOrig; i++) {
|
|
461
|
+
if (i >= firstDiff && i <= lastDiffOrig) patchLines.push(`-${originalLines[i]}`);
|
|
462
|
+
else patchLines.push(` ${originalLines[i]}`);
|
|
463
|
+
}
|
|
464
|
+
// Insert additions at right position
|
|
465
|
+
const finalLines: string[] = [];
|
|
466
|
+
for (let i = ctxStart; i <= ctxEndOrig; i++) {
|
|
467
|
+
if (i >= firstDiff && i <= lastDiffOrig) { finalLines.push(`-${originalLines[i]}`); }
|
|
468
|
+
else finalLines.push(` ${originalLines[i]}`);
|
|
469
|
+
}
|
|
470
|
+
// Add additions after last deletion
|
|
471
|
+
for (let i = firstDiff; i <= lastDiffNew; i++) {
|
|
472
|
+
if (i >= firstDiff && i <= lastDiffNew && (i > lastDiffOrig || originalLines[i] !== resultLines[i])) {
|
|
473
|
+
if (!finalLines.some((l) => l === `+${resultLines[i]}`)) finalLines.push(`+${resultLines[i]}`);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
const origCount2 = finalLines.filter((l) => !l.startsWith("+")).length;
|
|
477
|
+
const newCount2 = finalLines.filter((l) => !l.startsWith("-")).length;
|
|
478
|
+
const newHeader2 = `@@ -${ctxStart + 1},${origCount2} +${ctxStart + 1},${newCount2} @@`;
|
|
479
|
+
return {
|
|
480
|
+
header: newHeader2,
|
|
481
|
+
originalStartLine: ctxStart + 1,
|
|
482
|
+
originalLineCount: origCount2,
|
|
483
|
+
newStartLine: ctxStart + 1,
|
|
484
|
+
newLineCount: newCount2,
|
|
485
|
+
lines: finalLines,
|
|
486
|
+
additions: finalLines.filter((l) => l.startsWith("+")),
|
|
487
|
+
subtractions: finalLines.filter((l) => l.startsWith("-")),
|
|
488
|
+
contextLines: finalLines.filter((l) => l.startsWith(" ")),
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Fallback: filter valid lines and return
|
|
493
|
+
const validLines = hunk.lines.filter((l) => {
|
|
494
|
+
if (l.startsWith("+") || l.startsWith("-")) return true;
|
|
495
|
+
if (!l.startsWith(" ") && l.trim() !== "") return false;
|
|
496
|
+
const content = l.startsWith(" ") ? l.slice(1) : l;
|
|
497
|
+
if (content.trim() === "") return originalLines.includes(content);
|
|
498
|
+
return originalLines.some((fl) => fl.trim() === content.trim());
|
|
499
|
+
}).map((l) => (!l.startsWith("+") && !l.startsWith("-") && !l.startsWith(" ")) ? ` ${l}` : l);
|
|
500
|
+
const origCount = validLines.filter((l) => !l.startsWith("+")).length;
|
|
501
|
+
const newCount = validLines.filter((l) => !l.startsWith("-")).length;
|
|
502
|
+
const newHeader = `@@ -${actualOriginalStartLine},${origCount} +${actualOriginalStartLine},${newCount} @@`;
|
|
503
|
+
return {
|
|
504
|
+
header: newHeader,
|
|
505
|
+
originalStartLine: actualOriginalStartLine,
|
|
506
|
+
originalLineCount: origCount,
|
|
507
|
+
newStartLine: actualOriginalStartLine,
|
|
508
|
+
newLineCount: newCount,
|
|
509
|
+
lines: validLines,
|
|
510
|
+
additions: hunk.additions,
|
|
511
|
+
subtractions: hunk.subtractions,
|
|
512
|
+
contextLines: validLines.filter((l) => !l.startsWith("+") && !l.startsWith("-")),
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Pure insertion: output minimal -N,0 format required by unified diff spec
|
|
517
|
+
if (deletionLinesContent.length === 0 && additionLinesContent.length > 0) {
|
|
518
|
+
const pureHeader = `@@ -${actualOriginalStartLine},0 +${actualOriginalStartLine},${hunk.additions.length} @@`;
|
|
519
|
+
return {
|
|
520
|
+
header: pureHeader,
|
|
521
|
+
originalStartLine: actualOriginalStartLine,
|
|
522
|
+
originalLineCount: 0,
|
|
523
|
+
newStartLine: actualOriginalStartLine,
|
|
524
|
+
newLineCount: hunk.additions.length,
|
|
525
|
+
lines: hunk.additions,
|
|
526
|
+
additions: hunk.additions,
|
|
527
|
+
subtractions: [],
|
|
528
|
+
contextLines: [],
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// 4. Extract context lines from the original hunk body
|
|
533
|
+
const hunkContextBefore: string[] = [];
|
|
534
|
+
const hunkContextAfter: string[] = [];
|
|
535
|
+
let pastChanges = false;
|
|
536
|
+
for (const line of hunk.lines) {
|
|
537
|
+
if (line.startsWith("+") || line.startsWith("-")) {
|
|
538
|
+
pastChanges = true;
|
|
539
|
+
} else if (line.startsWith(" ")) {
|
|
540
|
+
if (!pastChanges) hunkContextBefore.push(line);
|
|
541
|
+
else hunkContextAfter.push(line);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Validate context lines against the file (reject ghost lines not present in file)
|
|
546
|
+
// Replace context lines with the actual line from the file to fix indentation divergence
|
|
547
|
+
const validContextBefore = hunkContextBefore
|
|
548
|
+
.map((l) => {
|
|
549
|
+
const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
|
|
550
|
+
return match !== undefined ? ` ${match}` : null;
|
|
551
|
+
})
|
|
552
|
+
.filter((l): l is string => l !== null);
|
|
553
|
+
const validContextAfter = hunkContextAfter
|
|
554
|
+
.map((l) => {
|
|
555
|
+
const match = originalLines.find((fl) => fl.trim() === l.slice(1).trim() && l.slice(1).trim() !== "");
|
|
556
|
+
return match !== undefined ? ` ${match}` : null;
|
|
557
|
+
})
|
|
558
|
+
.filter((l): l is string => l !== null);
|
|
559
|
+
|
|
560
|
+
// Supplement: add 1 extra line before the valid context for better anchoring
|
|
561
|
+
const supplementBeforeIdx = actualOriginalStartLine - 1 - validContextBefore.length - 1; // 0-based
|
|
562
|
+
const supplementBefore: string[] =
|
|
563
|
+
supplementBeforeIdx >= 0
|
|
564
|
+
? [` ${originalLines[supplementBeforeIdx]}`]
|
|
565
|
+
: [];
|
|
566
|
+
|
|
567
|
+
const contextBefore = [...supplementBefore, ...validContextBefore];
|
|
568
|
+
|
|
569
|
+
// For context after: use valid context from hunk; if none, take 1 line from file
|
|
570
|
+
const originalContentEndLine = actualOriginalStartLine + deletionLinesContent.length;
|
|
571
|
+
let contextAfter: string[];
|
|
572
|
+
if (deletionLinesContent.length === 0) {
|
|
573
|
+
// Pure insertion: always take the line at the insertion point from file (don't trust hunk context position)
|
|
574
|
+
const afterIdx = actualOriginalStartLine - 1; // 0-based index of line at insertion point
|
|
575
|
+
contextAfter = afterIdx < originalLines.length ? [` ${originalLines[afterIdx]}`] : [];
|
|
576
|
+
} else {
|
|
577
|
+
contextAfter = validContextAfter;
|
|
578
|
+
if (contextAfter.length === 0) {
|
|
579
|
+
const afterIdx = originalContentEndLine - 1; // 0-based index after deletions
|
|
580
|
+
if (afterIdx < originalLines.length) {
|
|
581
|
+
contextAfter = [` ${originalLines[afterIdx]}`];
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// For pure-insertion hunks (no deletions), don't supplement before - keep only hunk context
|
|
587
|
+
const finalContextBefore = deletionLinesContent.length === 0 ? validContextBefore : contextBefore;
|
|
375
588
|
|
|
376
589
|
const newHunkLines = [
|
|
377
|
-
...
|
|
590
|
+
...finalContextBefore,
|
|
378
591
|
...hunk.subtractions, // Use the original subtraction lines from the input hunk
|
|
379
592
|
...hunk.additions, // Use the original addition lines from the input hunk
|
|
380
593
|
...contextAfter,
|
|
@@ -382,11 +595,11 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
|
382
595
|
|
|
383
596
|
// 5. Recalculate the header
|
|
384
597
|
const newOriginalStart =
|
|
385
|
-
|
|
386
|
-
? actualOriginalStartLine -
|
|
598
|
+
finalContextBefore.length > 0
|
|
599
|
+
? actualOriginalStartLine - finalContextBefore.length
|
|
387
600
|
: actualOriginalStartLine;
|
|
388
601
|
const newOriginalCount =
|
|
389
|
-
|
|
602
|
+
finalContextBefore.length + hunk.subtractions.length + contextAfter.length;
|
|
390
603
|
|
|
391
604
|
// The new start line depends on how many lines were added/removed *before* this hunk.
|
|
392
605
|
// For an isolated hunk fix, we often just base it on the original start.
|
|
@@ -394,7 +607,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
|
394
607
|
// Let's keep it simple and relative to the original start for now.
|
|
395
608
|
const newNewStart = newOriginalStart; // Simplification: Assume start line number matches original unless offset by prior hunks (which we don't know here)
|
|
396
609
|
const newNewCount =
|
|
397
|
-
|
|
610
|
+
finalContextBefore.length + hunk.additions.length + contextAfter.length;
|
|
398
611
|
|
|
399
612
|
// Handle edge case where count is 0 (e.g., adding to an empty file) - header format needs >= 1
|
|
400
613
|
const finalOriginalStart = Math.max(1, newOriginalStart);
|
|
@@ -421,7 +634,7 @@ function fixSingleHunk(hunk: Hunk, originalContent: string): Hunk | null {
|
|
|
421
634
|
lines: newHunkLines,
|
|
422
635
|
additions: hunk.additions, // Keep original intended changes
|
|
423
636
|
subtractions: hunk.subtractions, // Keep original intended changes
|
|
424
|
-
contextLines: [...
|
|
637
|
+
contextLines: [...finalContextBefore, ...contextAfter], // Store the newly generated context
|
|
425
638
|
};
|
|
426
639
|
|
|
427
640
|
// 6. Filter out empty hunks
|
|
@@ -325,13 +325,30 @@ export class CustomVariables {
|
|
|
325
325
|
minRepetitions?: number; // Minimum occurrences to trigger hint (default: 2)
|
|
326
326
|
minSubstringLength?: number; // Minimum repeated substring length (default: 50)
|
|
327
327
|
recentMessagesWindow?: number; // Only scan the last N messages (default: 10)
|
|
328
|
+
throttleMessages?: number; // Only emit hint once per N new messages (default: 5)
|
|
329
|
+
maxExamples?: number; // Max number of example variables to show (default: 3)
|
|
330
|
+
hintMessageTokens?: number; // Estimated tokens in the hint message itself for net savings calc (default: 190)
|
|
328
331
|
} = {}): MessageProcessorFunction {
|
|
329
332
|
const minLength = options.minLength ?? 50;
|
|
330
333
|
const minRepetitions = options.minRepetitions ?? 2;
|
|
331
334
|
const minSubstringLength = options.minSubstringLength ?? 50;
|
|
332
335
|
const recentMessagesWindow = options.recentMessagesWindow ?? 10;
|
|
336
|
+
const throttleMessages = options.throttleMessages ?? 5;
|
|
337
|
+
const maxExamples = options.maxExamples ?? 3;
|
|
338
|
+
|
|
339
|
+
// ~100 base + 30 per example = ~190 tokens for the hint message itself
|
|
340
|
+
const hintMessageTokens = options.hintMessageTokens ?? (100 + maxExamples * 30);
|
|
341
|
+
|
|
342
|
+
// Throttle state: track message count at last hint emission
|
|
343
|
+
let lastHintAtMessageCount = -Infinity;
|
|
333
344
|
|
|
334
345
|
return async (originalMessages: Message[], modifiedMessages: Message[]) => {
|
|
346
|
+
// Throttle: only emit hint if enough new messages have been added since last hint
|
|
347
|
+
const currentMessageCount = modifiedMessages.length;
|
|
348
|
+
if (currentMessageCount - lastHintAtMessageCount < throttleMessages) {
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
|
|
335
352
|
// Count occurrences of each string value across all tool call arguments
|
|
336
353
|
const stringCounts = new Map<string, { count: number; toolNames: Set<string> }>();
|
|
337
354
|
|
|
@@ -391,8 +408,11 @@ export class CustomVariables {
|
|
|
391
408
|
|
|
392
409
|
// Find entries that exceed the repetition threshold
|
|
393
410
|
const repeatedTools: string[] = [];
|
|
411
|
+
const repeatedEntries: Array<{ str: string; count: number; toolNames: Set<string> }> = [];
|
|
412
|
+
|
|
394
413
|
for (const [str, info] of stringCounts.entries()) {
|
|
395
414
|
if (info.count >= minRepetitions) {
|
|
415
|
+
repeatedEntries.push({ str, count: info.count, toolNames: info.toolNames });
|
|
396
416
|
for (const toolName of info.toolNames) {
|
|
397
417
|
if (!repeatedTools.includes(toolName)) {
|
|
398
418
|
repeatedTools.push(toolName);
|
|
@@ -402,12 +422,41 @@ export class CustomVariables {
|
|
|
402
422
|
}
|
|
403
423
|
|
|
404
424
|
if (repeatedTools.length > 0) {
|
|
425
|
+
lastHintAtMessageCount = currentMessageCount;
|
|
426
|
+
|
|
427
|
+
// Sort by (count * str.length) desc to surface highest-savings items first
|
|
428
|
+
repeatedEntries.sort((a, b) => b.count * b.str.length - a.count * a.str.length);
|
|
429
|
+
|
|
430
|
+
// Estimate token savings: chars_saved ÷ 4 (rough tokens-per-char estimate)
|
|
431
|
+
// Savings = (repetitions - 1) * str.length chars saved by using a short variable ref
|
|
432
|
+
let totalCharsSaved = 0;
|
|
433
|
+
for (const { str, count } of repeatedEntries) {
|
|
434
|
+
totalCharsSaved += (count - 1) * str.length;
|
|
435
|
+
}
|
|
436
|
+
const grossTokensSaved = Math.round(totalCharsSaved / 4);
|
|
437
|
+
const netTokensSaved = grossTokensSaved - hintMessageTokens;
|
|
438
|
+
|
|
439
|
+
// Build example variable suggestions
|
|
440
|
+
const examples = repeatedEntries.slice(0, maxExamples).map(({ str, count, toolNames }, i) => {
|
|
441
|
+
const preview = str.trim().slice(0, 80).replace(/\s+/g, " ");
|
|
442
|
+
const ellipsis = str.length > 80 ? "…" : "";
|
|
443
|
+
const varName = `var${i + 1}`;
|
|
444
|
+
const charsSaved = (count - 1) * str.length;
|
|
445
|
+
const tokensSaved = Math.round(charsSaved / 4);
|
|
446
|
+
return (
|
|
447
|
+
` • \`${varName}\` (used ${count}x in ${[...toolNames].join(", ")}, ~${tokensSaved} tokens saveable): "${preview}${ellipsis}"`
|
|
448
|
+
);
|
|
449
|
+
});
|
|
450
|
+
|
|
405
451
|
modifiedMessages.push({
|
|
406
452
|
role: "user",
|
|
407
453
|
content:
|
|
408
|
-
`⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")}
|
|
454
|
+
`⚠️ Tool inputs have large repetitions detected in: ${repeatedTools.join(", ")} ` +
|
|
455
|
+
`(~${grossTokensSaved} tokens saveable, ~${netTokensSaved} net after this reminder). ` +
|
|
409
456
|
`Consider storing repeated values with \`setVariable\` or \`storeToolCallToVariable\`, ` +
|
|
410
|
-
`then reference them via {{variableName}} in future tool calls
|
|
457
|
+
`then reference them via {{variableName}} in future tool calls.\n` +
|
|
458
|
+
`Top repeated values to consider storing as variables:\n` +
|
|
459
|
+
examples.join("\n"),
|
|
411
460
|
});
|
|
412
461
|
}
|
|
413
462
|
};
|