git-coco 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +30 -0
- package/dist/index.esm.mjs +293 -37
- package/dist/index.js +293 -37
- package/package.json +5 -5
package/dist/index.d.ts
CHANGED
|
@@ -38,6 +38,21 @@ type BaseLLMService = {
|
|
|
38
38
|
* @default 6
|
|
39
39
|
*/
|
|
40
40
|
maxConcurrent?: number;
|
|
41
|
+
/**
|
|
42
|
+
* Minimum token count for a directory/file group to be eligible for summarization.
|
|
43
|
+
* Groups below this threshold preserve raw diffs to maintain detail.
|
|
44
|
+
*
|
|
45
|
+
* @default 400
|
|
46
|
+
*/
|
|
47
|
+
minTokensForSummary?: number;
|
|
48
|
+
/**
|
|
49
|
+
* Maximum tokens allowed for a single file diff before it gets pre-summarized.
|
|
50
|
+
* Prevents large files from biasing the overall summary.
|
|
51
|
+
* If not set, defaults to 25% of tokenLimit.
|
|
52
|
+
*
|
|
53
|
+
* @default undefined (uses 0.25 * tokenLimit)
|
|
54
|
+
*/
|
|
55
|
+
maxFileTokens?: number;
|
|
41
56
|
authentication: Authentication;
|
|
42
57
|
requestOptions?: {
|
|
43
58
|
timeout?: number;
|
|
@@ -339,6 +354,21 @@ interface BaseParserOptions {
|
|
|
339
354
|
git: SimpleGit;
|
|
340
355
|
logger: Logger;
|
|
341
356
|
maxTokens?: number;
|
|
357
|
+
/**
|
|
358
|
+
* Minimum token count for a directory/file group to be eligible for summarization.
|
|
359
|
+
* @default 400
|
|
360
|
+
*/
|
|
361
|
+
minTokensForSummary?: number;
|
|
362
|
+
/**
|
|
363
|
+
* Maximum tokens allowed for a single file diff before it gets pre-summarized.
|
|
364
|
+
* Defaults to 25% of maxTokens if not specified.
|
|
365
|
+
*/
|
|
366
|
+
maxFileTokens?: number;
|
|
367
|
+
/**
|
|
368
|
+
* Maximum number of concurrent summarization requests.
|
|
369
|
+
* @default 6
|
|
370
|
+
*/
|
|
371
|
+
maxConcurrent?: number;
|
|
342
372
|
}
|
|
343
373
|
interface BaseParserInput {
|
|
344
374
|
options: BaseParserOptions;
|
package/dist/index.esm.mjs
CHANGED
|
@@ -27,7 +27,6 @@ import { RUN_KEY } from '@langchain/core/outputs';
|
|
|
27
27
|
import { CallbackManager, parseCallbackConfigArg } from '@langchain/core/callbacks/manager';
|
|
28
28
|
import '@langchain/core/utils/json_patch';
|
|
29
29
|
import { simpleGit } from 'simple-git';
|
|
30
|
-
import pQueue from 'p-queue';
|
|
31
30
|
import { Document, BaseDocumentTransformer } from '@langchain/core/documents';
|
|
32
31
|
import { createTwoFilesPatch } from 'diff';
|
|
33
32
|
import '@langchain/core/messages';
|
|
@@ -47,7 +46,7 @@ import { pathToFileURL } from 'url';
|
|
|
47
46
|
/**
|
|
48
47
|
* Current build version from package.json
|
|
49
48
|
*/
|
|
50
|
-
const BUILD_VERSION = "0.
|
|
49
|
+
const BUILD_VERSION = "0.25.0";
|
|
51
50
|
|
|
52
51
|
const isInteractive = (config) => {
|
|
53
52
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -162,6 +161,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
162
161
|
fs__default.writeFileSync(filePath, newLines.join('\n'));
|
|
163
162
|
}
|
|
164
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Prompt template for summarizing code diffs.
|
|
166
|
+
*
|
|
167
|
+
* TODO: Future improvements to consider:
|
|
168
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
169
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
170
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
171
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
172
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
173
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
174
|
+
*/
|
|
165
175
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
166
176
|
|
|
167
177
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -1039,6 +1049,16 @@ const schema$1 = {
|
|
|
1039
1049
|
"description": "The maximum number of requests to make concurrently.",
|
|
1040
1050
|
"default": 6
|
|
1041
1051
|
},
|
|
1052
|
+
"minTokensForSummary": {
|
|
1053
|
+
"type": "number",
|
|
1054
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1055
|
+
"default": 400
|
|
1056
|
+
},
|
|
1057
|
+
"maxFileTokens": {
|
|
1058
|
+
"type": "number",
|
|
1059
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1060
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1061
|
+
},
|
|
1042
1062
|
"authentication": {
|
|
1043
1063
|
"anyOf": [
|
|
1044
1064
|
{
|
|
@@ -1799,6 +1819,16 @@ const schema$1 = {
|
|
|
1799
1819
|
"description": "The maximum number of requests to make concurrently.",
|
|
1800
1820
|
"default": 6
|
|
1801
1821
|
},
|
|
1822
|
+
"minTokensForSummary": {
|
|
1823
|
+
"type": "number",
|
|
1824
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1825
|
+
"default": 400
|
|
1826
|
+
},
|
|
1827
|
+
"maxFileTokens": {
|
|
1828
|
+
"type": "number",
|
|
1829
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1830
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1831
|
+
},
|
|
1802
1832
|
"authentication": {
|
|
1803
1833
|
"anyOf": [
|
|
1804
1834
|
{
|
|
@@ -1950,6 +1980,16 @@ const schema$1 = {
|
|
|
1950
1980
|
"description": "The maximum number of requests to make concurrently.",
|
|
1951
1981
|
"default": 6
|
|
1952
1982
|
},
|
|
1983
|
+
"minTokensForSummary": {
|
|
1984
|
+
"type": "number",
|
|
1985
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1986
|
+
"default": 400
|
|
1987
|
+
},
|
|
1988
|
+
"maxFileTokens": {
|
|
1989
|
+
"type": "number",
|
|
1990
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1991
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1992
|
+
},
|
|
1953
1993
|
"authentication": {
|
|
1954
1994
|
"anyOf": [
|
|
1955
1995
|
{
|
|
@@ -8217,6 +8257,114 @@ async function summarize(documents, { chain, textSplitter, options }) {
|
|
|
8217
8257
|
return res.text && res.text.trim();
|
|
8218
8258
|
}
|
|
8219
8259
|
|
|
8260
|
+
/**
|
|
8261
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8262
|
+
*/
|
|
8263
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8264
|
+
try {
|
|
8265
|
+
const fileSummary = await summarize([
|
|
8266
|
+
{
|
|
8267
|
+
pageContent: fileDiff.diff,
|
|
8268
|
+
metadata: {
|
|
8269
|
+
file: fileDiff.file,
|
|
8270
|
+
summary: fileDiff.summary,
|
|
8271
|
+
},
|
|
8272
|
+
},
|
|
8273
|
+
], {
|
|
8274
|
+
chain,
|
|
8275
|
+
textSplitter,
|
|
8276
|
+
options: {
|
|
8277
|
+
returnIntermediateSteps: false,
|
|
8278
|
+
},
|
|
8279
|
+
});
|
|
8280
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8281
|
+
return {
|
|
8282
|
+
...fileDiff,
|
|
8283
|
+
diff: fileSummary,
|
|
8284
|
+
tokenCount: newTokenCount,
|
|
8285
|
+
};
|
|
8286
|
+
}
|
|
8287
|
+
catch (error) {
|
|
8288
|
+
// On error, return original diff unchanged
|
|
8289
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8290
|
+
return fileDiff;
|
|
8291
|
+
}
|
|
8292
|
+
}
|
|
8293
|
+
/**
|
|
8294
|
+
* Process files in waves to respect concurrency limits.
|
|
8295
|
+
*/
|
|
8296
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8297
|
+
const results = [];
|
|
8298
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8299
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8300
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8301
|
+
results.push(...waveResults);
|
|
8302
|
+
}
|
|
8303
|
+
return results;
|
|
8304
|
+
}
|
|
8305
|
+
/**
|
|
8306
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8307
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8308
|
+
* the final commit message toward a single file's changes.
|
|
8309
|
+
*
|
|
8310
|
+
* @param diffs - Array of file diffs to process
|
|
8311
|
+
* @param options - Configuration options for summarization
|
|
8312
|
+
* @returns Array of file diffs with large files summarized
|
|
8313
|
+
*/
|
|
8314
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8315
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8316
|
+
// Identify files that need summarization
|
|
8317
|
+
const filesToSummarize = [];
|
|
8318
|
+
const results = [...diffs];
|
|
8319
|
+
diffs.forEach((diff, index) => {
|
|
8320
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8321
|
+
filesToSummarize.push({ index, diff });
|
|
8322
|
+
}
|
|
8323
|
+
});
|
|
8324
|
+
if (filesToSummarize.length === 0) {
|
|
8325
|
+
return results;
|
|
8326
|
+
}
|
|
8327
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8328
|
+
// Process large files in waves
|
|
8329
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8330
|
+
// Update results with summarized files
|
|
8331
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8332
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8333
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8334
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8335
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8336
|
+
results[originalIndex] = summarizedDiff;
|
|
8337
|
+
});
|
|
8338
|
+
return results;
|
|
8339
|
+
}
|
|
8340
|
+
/**
|
|
8341
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8342
|
+
* Returns a new DiffNode with updated token counts.
|
|
8343
|
+
*/
|
|
8344
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8345
|
+
// Collect all diffs from the tree
|
|
8346
|
+
const allDiffs = [];
|
|
8347
|
+
function collectDiffs(node) {
|
|
8348
|
+
allDiffs.push(...node.diffs);
|
|
8349
|
+
node.children.forEach(collectDiffs);
|
|
8350
|
+
}
|
|
8351
|
+
collectDiffs(rootNode);
|
|
8352
|
+
// Summarize large files
|
|
8353
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8354
|
+
// Create a map for quick lookup
|
|
8355
|
+
const diffMap = new Map();
|
|
8356
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8357
|
+
// Rebuild tree with processed diffs
|
|
8358
|
+
function rebuildNode(node) {
|
|
8359
|
+
return {
|
|
8360
|
+
path: node.path,
|
|
8361
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8362
|
+
children: node.children.map(rebuildNode),
|
|
8363
|
+
};
|
|
8364
|
+
}
|
|
8365
|
+
return rebuildNode(rootNode);
|
|
8366
|
+
}
|
|
8367
|
+
|
|
8220
8368
|
/**
|
|
8221
8369
|
* Create groups from a given node info.
|
|
8222
8370
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8269,6 +8417,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8269
8417
|
return directory;
|
|
8270
8418
|
}
|
|
8271
8419
|
}
|
|
8420
|
+
/**
|
|
8421
|
+
* Default output formatter for directory diffs.
|
|
8422
|
+
*
|
|
8423
|
+
* TODO: Future improvements to consider:
|
|
8424
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8425
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8426
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8427
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8428
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8429
|
+
*/
|
|
8272
8430
|
const defaultOutputCallback = (group) => {
|
|
8273
8431
|
let output = `
|
|
8274
8432
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8280,41 +8438,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8280
8438
|
}
|
|
8281
8439
|
return output;
|
|
8282
8440
|
};
|
|
8283
|
-
|
|
8284
|
-
|
|
8441
|
+
/**
|
|
8442
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8443
|
+
* while maintaining predictable behavior.
|
|
8444
|
+
*/
|
|
8445
|
+
async function summarizeInWaves(directories, options) {
|
|
8446
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8447
|
+
let totalTokenCount = initialTotal;
|
|
8448
|
+
const results = [...directories];
|
|
8449
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8450
|
+
const sortedIndices = directories
|
|
8451
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8452
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8453
|
+
let cursor = 0;
|
|
8454
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8455
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8456
|
+
const wave = [];
|
|
8457
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8458
|
+
const { index, tokens } = sortedIndices[i];
|
|
8459
|
+
// Skip directories below the minimum threshold
|
|
8460
|
+
if (tokens < minTokensForSummary) {
|
|
8461
|
+
cursor = i + 1;
|
|
8462
|
+
continue;
|
|
8463
|
+
}
|
|
8464
|
+
// Skip directories that have already been summarized
|
|
8465
|
+
if (results[index].summary) {
|
|
8466
|
+
cursor = i + 1;
|
|
8467
|
+
continue;
|
|
8468
|
+
}
|
|
8469
|
+
wave.push(index);
|
|
8470
|
+
cursor = i + 1;
|
|
8471
|
+
}
|
|
8472
|
+
// No more eligible candidates
|
|
8473
|
+
if (wave.length === 0) {
|
|
8474
|
+
break;
|
|
8475
|
+
}
|
|
8476
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8477
|
+
// Process wave in parallel
|
|
8478
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8479
|
+
// Update results and recalculate total
|
|
8480
|
+
waveResults.forEach((result, i) => {
|
|
8481
|
+
const idx = wave[i];
|
|
8482
|
+
const originalTokens = results[idx].tokenCount;
|
|
8483
|
+
const newTokens = result.tokenCount;
|
|
8484
|
+
const reduction = originalTokens - newTokens;
|
|
8485
|
+
totalTokenCount -= reduction;
|
|
8486
|
+
results[idx] = result;
|
|
8487
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8488
|
+
color: 'magenta',
|
|
8489
|
+
});
|
|
8490
|
+
});
|
|
8491
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8492
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8493
|
+
});
|
|
8494
|
+
// Check if we're now under budget
|
|
8495
|
+
if (totalTokenCount <= maxTokens) {
|
|
8496
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8497
|
+
break;
|
|
8498
|
+
}
|
|
8499
|
+
}
|
|
8500
|
+
return { directories: results, totalTokenCount };
|
|
8501
|
+
}
|
|
8502
|
+
/**
|
|
8503
|
+
* Summarize diffs using a three-phase approach:
|
|
8504
|
+
*
|
|
8505
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8506
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8507
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8508
|
+
*
|
|
8509
|
+
* This approach ensures:
|
|
8510
|
+
* - Large files don't bias the summary
|
|
8511
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8512
|
+
* - Efficient parallel processing with predictable behavior
|
|
8513
|
+
* - Early exit when under token budget
|
|
8514
|
+
*/
|
|
8515
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8516
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8517
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8518
|
+
// PHASE 1: Pre-process large files
|
|
8519
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8520
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8521
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8522
|
+
minTokensForSummary,
|
|
8523
|
+
maxConcurrent,
|
|
8524
|
+
tokenizer,
|
|
8525
|
+
logger,
|
|
8526
|
+
chain,
|
|
8527
|
+
textSplitter,
|
|
8528
|
+
});
|
|
8529
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8530
|
+
// PHASE 2: Directory grouping & assessment
|
|
8285
8531
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8286
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8287
|
-
// Sort by token count descending
|
|
8532
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8533
|
+
// Sort by token count descending for consistent output ordering
|
|
8288
8534
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8289
|
-
|
|
8535
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8290
8536
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8291
|
-
logger.
|
|
8292
|
-
|
|
8293
|
-
|
|
8294
|
-
|
|
8295
|
-
|
|
8296
|
-
|
|
8297
|
-
|
|
8298
|
-
|
|
8299
|
-
|
|
8300
|
-
|
|
8301
|
-
|
|
8302
|
-
|
|
8303
|
-
|
|
8304
|
-
|
|
8305
|
-
|
|
8306
|
-
|
|
8307
|
-
|
|
8308
|
-
|
|
8309
|
-
|
|
8310
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8311
|
-
});
|
|
8312
|
-
return group;
|
|
8313
|
-
}, { priority: group.tokenCount });
|
|
8537
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8538
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8539
|
+
});
|
|
8540
|
+
// Early exit if already under budget
|
|
8541
|
+
if (totalTokenCount <= maxTokens) {
|
|
8542
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8543
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8544
|
+
}
|
|
8545
|
+
// PHASE 3: Wave-based summarization
|
|
8546
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8547
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8548
|
+
totalTokenCount,
|
|
8549
|
+
maxTokens,
|
|
8550
|
+
minTokensForSummary,
|
|
8551
|
+
maxConcurrent,
|
|
8552
|
+
logger,
|
|
8553
|
+
chain,
|
|
8554
|
+
textSplitter,
|
|
8555
|
+
tokenizer,
|
|
8314
8556
|
});
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
return directoryDiffs.map(handleOutput).join('');
|
|
8557
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8558
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8318
8559
|
}
|
|
8319
8560
|
|
|
8320
8561
|
/**
|
|
@@ -11314,7 +11555,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11314
11555
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11315
11556
|
}
|
|
11316
11557
|
|
|
11317
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11558
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11318
11559
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11319
11560
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11320
11561
|
type: 'map_reduce',
|
|
@@ -11328,11 +11569,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11328
11569
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11329
11570
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11330
11571
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11331
|
-
// Summarize diffs
|
|
11572
|
+
// Summarize diffs using three-phase approach:
|
|
11573
|
+
// 1. Pre-process large files to prevent bias
|
|
11574
|
+
// 2. Group by directory and assess token count
|
|
11575
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11332
11576
|
logger.startTimer();
|
|
11333
11577
|
const summary = await summarizeDiffs(diffs, {
|
|
11334
11578
|
tokenizer,
|
|
11335
|
-
maxTokens: maxTokens ||
|
|
11579
|
+
maxTokens: maxTokens || 2048,
|
|
11580
|
+
minTokensForSummary,
|
|
11581
|
+
maxFileTokens,
|
|
11582
|
+
maxConcurrent,
|
|
11336
11583
|
textSplitter,
|
|
11337
11584
|
chain: summarizationChain,
|
|
11338
11585
|
logger,
|
|
@@ -11635,7 +11882,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11635
11882
|
return await fileChangeParser({
|
|
11636
11883
|
changes,
|
|
11637
11884
|
commit: '--staged',
|
|
11638
|
-
options: {
|
|
11885
|
+
options: {
|
|
11886
|
+
tokenizer,
|
|
11887
|
+
git,
|
|
11888
|
+
llm,
|
|
11889
|
+
logger,
|
|
11890
|
+
maxTokens: config.service.tokenLimit,
|
|
11891
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11892
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11893
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11894
|
+
},
|
|
11639
11895
|
});
|
|
11640
11896
|
}
|
|
11641
11897
|
const commitMsg = await generateAndReviewLoop({
|
package/dist/index.js
CHANGED
|
@@ -26,7 +26,6 @@ var outputs = require('@langchain/core/outputs');
|
|
|
26
26
|
var manager = require('@langchain/core/callbacks/manager');
|
|
27
27
|
require('@langchain/core/utils/json_patch');
|
|
28
28
|
var simpleGit = require('simple-git');
|
|
29
|
-
var pQueue = require('p-queue');
|
|
30
29
|
var documents = require('@langchain/core/documents');
|
|
31
30
|
var diff = require('diff');
|
|
32
31
|
require('@langchain/core/messages');
|
|
@@ -69,7 +68,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
|
|
|
69
68
|
/**
|
|
70
69
|
* Current build version from package.json
|
|
71
70
|
*/
|
|
72
|
-
const BUILD_VERSION = "0.
|
|
71
|
+
const BUILD_VERSION = "0.25.0";
|
|
73
72
|
|
|
74
73
|
const isInteractive = (config) => {
|
|
75
74
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -184,6 +183,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
184
183
|
fs.writeFileSync(filePath, newLines.join('\n'));
|
|
185
184
|
}
|
|
186
185
|
|
|
186
|
+
/**
|
|
187
|
+
* Prompt template for summarizing code diffs.
|
|
188
|
+
*
|
|
189
|
+
* TODO: Future improvements to consider:
|
|
190
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
191
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
192
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
193
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
194
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
195
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
196
|
+
*/
|
|
187
197
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
188
198
|
|
|
189
199
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -1061,6 +1071,16 @@ const schema$1 = {
|
|
|
1061
1071
|
"description": "The maximum number of requests to make concurrently.",
|
|
1062
1072
|
"default": 6
|
|
1063
1073
|
},
|
|
1074
|
+
"minTokensForSummary": {
|
|
1075
|
+
"type": "number",
|
|
1076
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1077
|
+
"default": 400
|
|
1078
|
+
},
|
|
1079
|
+
"maxFileTokens": {
|
|
1080
|
+
"type": "number",
|
|
1081
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1082
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1083
|
+
},
|
|
1064
1084
|
"authentication": {
|
|
1065
1085
|
"anyOf": [
|
|
1066
1086
|
{
|
|
@@ -1821,6 +1841,16 @@ const schema$1 = {
|
|
|
1821
1841
|
"description": "The maximum number of requests to make concurrently.",
|
|
1822
1842
|
"default": 6
|
|
1823
1843
|
},
|
|
1844
|
+
"minTokensForSummary": {
|
|
1845
|
+
"type": "number",
|
|
1846
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1847
|
+
"default": 400
|
|
1848
|
+
},
|
|
1849
|
+
"maxFileTokens": {
|
|
1850
|
+
"type": "number",
|
|
1851
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1852
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1853
|
+
},
|
|
1824
1854
|
"authentication": {
|
|
1825
1855
|
"anyOf": [
|
|
1826
1856
|
{
|
|
@@ -1972,6 +2002,16 @@ const schema$1 = {
|
|
|
1972
2002
|
"description": "The maximum number of requests to make concurrently.",
|
|
1973
2003
|
"default": 6
|
|
1974
2004
|
},
|
|
2005
|
+
"minTokensForSummary": {
|
|
2006
|
+
"type": "number",
|
|
2007
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
2008
|
+
"default": 400
|
|
2009
|
+
},
|
|
2010
|
+
"maxFileTokens": {
|
|
2011
|
+
"type": "number",
|
|
2012
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
2013
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
2014
|
+
},
|
|
1975
2015
|
"authentication": {
|
|
1976
2016
|
"anyOf": [
|
|
1977
2017
|
{
|
|
@@ -8239,6 +8279,114 @@ async function summarize(documents$1, { chain, textSplitter, options }) {
|
|
|
8239
8279
|
return res.text && res.text.trim();
|
|
8240
8280
|
}
|
|
8241
8281
|
|
|
8282
|
+
/**
|
|
8283
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8284
|
+
*/
|
|
8285
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8286
|
+
try {
|
|
8287
|
+
const fileSummary = await summarize([
|
|
8288
|
+
{
|
|
8289
|
+
pageContent: fileDiff.diff,
|
|
8290
|
+
metadata: {
|
|
8291
|
+
file: fileDiff.file,
|
|
8292
|
+
summary: fileDiff.summary,
|
|
8293
|
+
},
|
|
8294
|
+
},
|
|
8295
|
+
], {
|
|
8296
|
+
chain,
|
|
8297
|
+
textSplitter,
|
|
8298
|
+
options: {
|
|
8299
|
+
returnIntermediateSteps: false,
|
|
8300
|
+
},
|
|
8301
|
+
});
|
|
8302
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8303
|
+
return {
|
|
8304
|
+
...fileDiff,
|
|
8305
|
+
diff: fileSummary,
|
|
8306
|
+
tokenCount: newTokenCount,
|
|
8307
|
+
};
|
|
8308
|
+
}
|
|
8309
|
+
catch (error) {
|
|
8310
|
+
// On error, return original diff unchanged
|
|
8311
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8312
|
+
return fileDiff;
|
|
8313
|
+
}
|
|
8314
|
+
}
|
|
8315
|
+
/**
|
|
8316
|
+
* Process files in waves to respect concurrency limits.
|
|
8317
|
+
*/
|
|
8318
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8319
|
+
const results = [];
|
|
8320
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8321
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8322
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8323
|
+
results.push(...waveResults);
|
|
8324
|
+
}
|
|
8325
|
+
return results;
|
|
8326
|
+
}
|
|
8327
|
+
/**
|
|
8328
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8329
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8330
|
+
* the final commit message toward a single file's changes.
|
|
8331
|
+
*
|
|
8332
|
+
* @param diffs - Array of file diffs to process
|
|
8333
|
+
* @param options - Configuration options for summarization
|
|
8334
|
+
* @returns Array of file diffs with large files summarized
|
|
8335
|
+
*/
|
|
8336
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8337
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8338
|
+
// Identify files that need summarization
|
|
8339
|
+
const filesToSummarize = [];
|
|
8340
|
+
const results = [...diffs];
|
|
8341
|
+
diffs.forEach((diff, index) => {
|
|
8342
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8343
|
+
filesToSummarize.push({ index, diff });
|
|
8344
|
+
}
|
|
8345
|
+
});
|
|
8346
|
+
if (filesToSummarize.length === 0) {
|
|
8347
|
+
return results;
|
|
8348
|
+
}
|
|
8349
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8350
|
+
// Process large files in waves
|
|
8351
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8352
|
+
// Update results with summarized files
|
|
8353
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8354
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8355
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8356
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8357
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8358
|
+
results[originalIndex] = summarizedDiff;
|
|
8359
|
+
});
|
|
8360
|
+
return results;
|
|
8361
|
+
}
|
|
8362
|
+
/**
|
|
8363
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8364
|
+
* Returns a new DiffNode with updated token counts.
|
|
8365
|
+
*/
|
|
8366
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8367
|
+
// Collect all diffs from the tree
|
|
8368
|
+
const allDiffs = [];
|
|
8369
|
+
function collectDiffs(node) {
|
|
8370
|
+
allDiffs.push(...node.diffs);
|
|
8371
|
+
node.children.forEach(collectDiffs);
|
|
8372
|
+
}
|
|
8373
|
+
collectDiffs(rootNode);
|
|
8374
|
+
// Summarize large files
|
|
8375
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8376
|
+
// Create a map for quick lookup
|
|
8377
|
+
const diffMap = new Map();
|
|
8378
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8379
|
+
// Rebuild tree with processed diffs
|
|
8380
|
+
function rebuildNode(node) {
|
|
8381
|
+
return {
|
|
8382
|
+
path: node.path,
|
|
8383
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8384
|
+
children: node.children.map(rebuildNode),
|
|
8385
|
+
};
|
|
8386
|
+
}
|
|
8387
|
+
return rebuildNode(rootNode);
|
|
8388
|
+
}
|
|
8389
|
+
|
|
8242
8390
|
/**
|
|
8243
8391
|
* Create groups from a given node info.
|
|
8244
8392
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8291,6 +8439,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8291
8439
|
return directory;
|
|
8292
8440
|
}
|
|
8293
8441
|
}
|
|
8442
|
+
/**
|
|
8443
|
+
* Default output formatter for directory diffs.
|
|
8444
|
+
*
|
|
8445
|
+
* TODO: Future improvements to consider:
|
|
8446
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8447
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8448
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8449
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8450
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8451
|
+
*/
|
|
8294
8452
|
const defaultOutputCallback = (group) => {
|
|
8295
8453
|
let output = `
|
|
8296
8454
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8302,41 +8460,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8302
8460
|
}
|
|
8303
8461
|
return output;
|
|
8304
8462
|
};
|
|
8305
|
-
|
|
8306
|
-
|
|
8463
|
+
/**
|
|
8464
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8465
|
+
* while maintaining predictable behavior.
|
|
8466
|
+
*/
|
|
8467
|
+
async function summarizeInWaves(directories, options) {
|
|
8468
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8469
|
+
let totalTokenCount = initialTotal;
|
|
8470
|
+
const results = [...directories];
|
|
8471
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8472
|
+
const sortedIndices = directories
|
|
8473
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8474
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8475
|
+
let cursor = 0;
|
|
8476
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8477
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8478
|
+
const wave = [];
|
|
8479
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8480
|
+
const { index, tokens } = sortedIndices[i];
|
|
8481
|
+
// Skip directories below the minimum threshold
|
|
8482
|
+
if (tokens < minTokensForSummary) {
|
|
8483
|
+
cursor = i + 1;
|
|
8484
|
+
continue;
|
|
8485
|
+
}
|
|
8486
|
+
// Skip directories that have already been summarized
|
|
8487
|
+
if (results[index].summary) {
|
|
8488
|
+
cursor = i + 1;
|
|
8489
|
+
continue;
|
|
8490
|
+
}
|
|
8491
|
+
wave.push(index);
|
|
8492
|
+
cursor = i + 1;
|
|
8493
|
+
}
|
|
8494
|
+
// No more eligible candidates
|
|
8495
|
+
if (wave.length === 0) {
|
|
8496
|
+
break;
|
|
8497
|
+
}
|
|
8498
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8499
|
+
// Process wave in parallel
|
|
8500
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8501
|
+
// Update results and recalculate total
|
|
8502
|
+
waveResults.forEach((result, i) => {
|
|
8503
|
+
const idx = wave[i];
|
|
8504
|
+
const originalTokens = results[idx].tokenCount;
|
|
8505
|
+
const newTokens = result.tokenCount;
|
|
8506
|
+
const reduction = originalTokens - newTokens;
|
|
8507
|
+
totalTokenCount -= reduction;
|
|
8508
|
+
results[idx] = result;
|
|
8509
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8510
|
+
color: 'magenta',
|
|
8511
|
+
});
|
|
8512
|
+
});
|
|
8513
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8514
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8515
|
+
});
|
|
8516
|
+
// Check if we're now under budget
|
|
8517
|
+
if (totalTokenCount <= maxTokens) {
|
|
8518
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8519
|
+
break;
|
|
8520
|
+
}
|
|
8521
|
+
}
|
|
8522
|
+
return { directories: results, totalTokenCount };
|
|
8523
|
+
}
|
|
8524
|
+
/**
|
|
8525
|
+
* Summarize diffs using a three-phase approach:
|
|
8526
|
+
*
|
|
8527
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8528
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8529
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8530
|
+
*
|
|
8531
|
+
* This approach ensures:
|
|
8532
|
+
* - Large files don't bias the summary
|
|
8533
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8534
|
+
* - Efficient parallel processing with predictable behavior
|
|
8535
|
+
* - Early exit when under token budget
|
|
8536
|
+
*/
|
|
8537
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8538
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8539
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8540
|
+
// PHASE 1: Pre-process large files
|
|
8541
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8542
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8543
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8544
|
+
minTokensForSummary,
|
|
8545
|
+
maxConcurrent,
|
|
8546
|
+
tokenizer,
|
|
8547
|
+
logger,
|
|
8548
|
+
chain,
|
|
8549
|
+
textSplitter,
|
|
8550
|
+
});
|
|
8551
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8552
|
+
// PHASE 2: Directory grouping & assessment
|
|
8307
8553
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8308
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8309
|
-
// Sort by token count descending
|
|
8554
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8555
|
+
// Sort by token count descending for consistent output ordering
|
|
8310
8556
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8311
|
-
|
|
8557
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8312
8558
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8313
|
-
logger.
|
|
8314
|
-
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
|
|
8318
|
-
|
|
8319
|
-
|
|
8320
|
-
|
|
8321
|
-
|
|
8322
|
-
|
|
8323
|
-
|
|
8324
|
-
|
|
8325
|
-
|
|
8326
|
-
|
|
8327
|
-
|
|
8328
|
-
|
|
8329
|
-
|
|
8330
|
-
|
|
8331
|
-
|
|
8332
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8333
|
-
});
|
|
8334
|
-
return group;
|
|
8335
|
-
}, { priority: group.tokenCount });
|
|
8559
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8560
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8561
|
+
});
|
|
8562
|
+
// Early exit if already under budget
|
|
8563
|
+
if (totalTokenCount <= maxTokens) {
|
|
8564
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8565
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8566
|
+
}
|
|
8567
|
+
// PHASE 3: Wave-based summarization
|
|
8568
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8569
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8570
|
+
totalTokenCount,
|
|
8571
|
+
maxTokens,
|
|
8572
|
+
minTokensForSummary,
|
|
8573
|
+
maxConcurrent,
|
|
8574
|
+
logger,
|
|
8575
|
+
chain,
|
|
8576
|
+
textSplitter,
|
|
8577
|
+
tokenizer,
|
|
8336
8578
|
});
|
|
8337
|
-
|
|
8338
|
-
|
|
8339
|
-
return directoryDiffs.map(handleOutput).join('');
|
|
8579
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8580
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8340
8581
|
}
|
|
8341
8582
|
|
|
8342
8583
|
/**
|
|
@@ -11336,7 +11577,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11336
11577
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11337
11578
|
}
|
|
11338
11579
|
|
|
11339
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11580
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11340
11581
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11341
11582
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11342
11583
|
type: 'map_reduce',
|
|
@@ -11350,11 +11591,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11350
11591
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11351
11592
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11352
11593
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11353
|
-
// Summarize diffs
|
|
11594
|
+
// Summarize diffs using three-phase approach:
|
|
11595
|
+
// 1. Pre-process large files to prevent bias
|
|
11596
|
+
// 2. Group by directory and assess token count
|
|
11597
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11354
11598
|
logger.startTimer();
|
|
11355
11599
|
const summary = await summarizeDiffs(diffs, {
|
|
11356
11600
|
tokenizer,
|
|
11357
|
-
maxTokens: maxTokens ||
|
|
11601
|
+
maxTokens: maxTokens || 2048,
|
|
11602
|
+
minTokensForSummary,
|
|
11603
|
+
maxFileTokens,
|
|
11604
|
+
maxConcurrent,
|
|
11358
11605
|
textSplitter,
|
|
11359
11606
|
chain: summarizationChain,
|
|
11360
11607
|
logger,
|
|
@@ -11657,7 +11904,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11657
11904
|
return await fileChangeParser({
|
|
11658
11905
|
changes,
|
|
11659
11906
|
commit: '--staged',
|
|
11660
|
-
options: {
|
|
11907
|
+
options: {
|
|
11908
|
+
tokenizer,
|
|
11909
|
+
git,
|
|
11910
|
+
llm,
|
|
11911
|
+
logger,
|
|
11912
|
+
maxTokens: config.service.tokenLimit,
|
|
11913
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11914
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11915
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11916
|
+
},
|
|
11661
11917
|
});
|
|
11662
11918
|
}
|
|
11663
11919
|
const commitMsg = await generateAndReviewLoop({
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "git-coco",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "zero-effort git commits with coco.",
|
|
5
5
|
"author": "gfargo <ghfargo@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
@@ -77,20 +77,20 @@
|
|
|
77
77
|
"@commitlint/core": "^19.8.0",
|
|
78
78
|
"@inquirer/prompts": "3.3.0",
|
|
79
79
|
"@langchain/anthropic": "^0.3.14",
|
|
80
|
-
"@langchain/community": "^0.3.
|
|
81
|
-
"@langchain/core": "^0.3.
|
|
80
|
+
"@langchain/community": "^0.3.58",
|
|
81
|
+
"@langchain/core": "^0.3.80",
|
|
82
82
|
"@langchain/ollama": "^0.2.0",
|
|
83
83
|
"@langchain/openai": "^0.6.7",
|
|
84
84
|
"ajv": "^8.16.0",
|
|
85
85
|
"chalk": "4.1.2",
|
|
86
|
-
"diff": "8.0.
|
|
86
|
+
"diff": "8.0.3",
|
|
87
87
|
"ini": "5.0.0",
|
|
88
88
|
"minimatch": "^9.0.5",
|
|
89
89
|
"ora": "5.4.1",
|
|
90
90
|
"p-queue": "5.0.0",
|
|
91
91
|
"performance-now": "2.1.0",
|
|
92
92
|
"pretty-ms": "7.0.1",
|
|
93
|
-
"simple-git": "3.
|
|
93
|
+
"simple-git": "3.30.0",
|
|
94
94
|
"tiktoken": "^1.0.21",
|
|
95
95
|
"yargs": "17.7.2"
|
|
96
96
|
},
|