git-coco 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +30 -0
- package/dist/index.esm.mjs +340 -39
- package/dist/index.js +340 -39
- package/package.json +6 -6
package/dist/index.d.ts
CHANGED
|
@@ -38,6 +38,21 @@ type BaseLLMService = {
|
|
|
38
38
|
* @default 6
|
|
39
39
|
*/
|
|
40
40
|
maxConcurrent?: number;
|
|
41
|
+
/**
|
|
42
|
+
* Minimum token count for a directory/file group to be eligible for summarization.
|
|
43
|
+
* Groups below this threshold preserve raw diffs to maintain detail.
|
|
44
|
+
*
|
|
45
|
+
* @default 400
|
|
46
|
+
*/
|
|
47
|
+
minTokensForSummary?: number;
|
|
48
|
+
/**
|
|
49
|
+
* Maximum tokens allowed for a single file diff before it gets pre-summarized.
|
|
50
|
+
* Prevents large files from biasing the overall summary.
|
|
51
|
+
* If not set, defaults to 25% of tokenLimit.
|
|
52
|
+
*
|
|
53
|
+
* @default undefined (uses 0.25 * tokenLimit)
|
|
54
|
+
*/
|
|
55
|
+
maxFileTokens?: number;
|
|
41
56
|
authentication: Authentication;
|
|
42
57
|
requestOptions?: {
|
|
43
58
|
timeout?: number;
|
|
@@ -339,6 +354,21 @@ interface BaseParserOptions {
|
|
|
339
354
|
git: SimpleGit;
|
|
340
355
|
logger: Logger;
|
|
341
356
|
maxTokens?: number;
|
|
357
|
+
/**
|
|
358
|
+
* Minimum token count for a directory/file group to be eligible for summarization.
|
|
359
|
+
* @default 400
|
|
360
|
+
*/
|
|
361
|
+
minTokensForSummary?: number;
|
|
362
|
+
/**
|
|
363
|
+
* Maximum tokens allowed for a single file diff before it gets pre-summarized.
|
|
364
|
+
* Defaults to 25% of maxTokens if not specified.
|
|
365
|
+
*/
|
|
366
|
+
maxFileTokens?: number;
|
|
367
|
+
/**
|
|
368
|
+
* Maximum number of concurrent summarization requests.
|
|
369
|
+
* @default 6
|
|
370
|
+
*/
|
|
371
|
+
maxConcurrent?: number;
|
|
342
372
|
}
|
|
343
373
|
interface BaseParserInput {
|
|
344
374
|
options: BaseParserOptions;
|
package/dist/index.esm.mjs
CHANGED
|
@@ -27,7 +27,6 @@ import { RUN_KEY } from '@langchain/core/outputs';
|
|
|
27
27
|
import { CallbackManager, parseCallbackConfigArg } from '@langchain/core/callbacks/manager';
|
|
28
28
|
import '@langchain/core/utils/json_patch';
|
|
29
29
|
import { simpleGit } from 'simple-git';
|
|
30
|
-
import pQueue from 'p-queue';
|
|
31
30
|
import { Document, BaseDocumentTransformer } from '@langchain/core/documents';
|
|
32
31
|
import { createTwoFilesPatch } from 'diff';
|
|
33
32
|
import '@langchain/core/messages';
|
|
@@ -47,7 +46,7 @@ import { pathToFileURL } from 'url';
|
|
|
47
46
|
/**
|
|
48
47
|
* Current build version from package.json
|
|
49
48
|
*/
|
|
50
|
-
const BUILD_VERSION = "0.
|
|
49
|
+
const BUILD_VERSION = "0.26.0";
|
|
51
50
|
|
|
52
51
|
const isInteractive = (config) => {
|
|
53
52
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -162,6 +161,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
162
161
|
fs__default.writeFileSync(filePath, newLines.join('\n'));
|
|
163
162
|
}
|
|
164
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Prompt template for summarizing code diffs.
|
|
166
|
+
*
|
|
167
|
+
* TODO: Future improvements to consider:
|
|
168
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
169
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
170
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
171
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
172
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
173
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
174
|
+
*/
|
|
165
175
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
166
176
|
|
|
167
177
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -347,8 +357,11 @@ function getDefaultServiceApiKey(config) {
|
|
|
347
357
|
const DEFAULT_OPENAI_LLM_SERVICE = {
|
|
348
358
|
provider: 'openai',
|
|
349
359
|
model: 'gpt-4o-mini',
|
|
350
|
-
tokenLimit:
|
|
360
|
+
tokenLimit: 4096,
|
|
351
361
|
temperature: 0.32,
|
|
362
|
+
maxConcurrent: 12,
|
|
363
|
+
minTokensForSummary: 800,
|
|
364
|
+
maxFileTokens: 2000,
|
|
352
365
|
authentication: {
|
|
353
366
|
type: 'APIKey',
|
|
354
367
|
credentials: {
|
|
@@ -360,6 +373,10 @@ const DEFAULT_ANTHROPIC_LLM_SERVICE = {
|
|
|
360
373
|
provider: 'anthropic',
|
|
361
374
|
model: 'claude-3-5-sonnet-20240620',
|
|
362
375
|
temperature: 0.32,
|
|
376
|
+
tokenLimit: 4096,
|
|
377
|
+
maxConcurrent: 12,
|
|
378
|
+
minTokensForSummary: 800,
|
|
379
|
+
maxFileTokens: 2000,
|
|
363
380
|
authentication: {
|
|
364
381
|
type: 'APIKey',
|
|
365
382
|
credentials: {
|
|
@@ -372,9 +389,11 @@ const DEFAULT_OLLAMA_LLM_SERVICE = {
|
|
|
372
389
|
model: 'llama3',
|
|
373
390
|
endpoint: 'http://localhost:11434',
|
|
374
391
|
maxConcurrent: 1,
|
|
375
|
-
tokenLimit:
|
|
392
|
+
tokenLimit: 4096,
|
|
376
393
|
temperature: 0.4,
|
|
377
394
|
maxParsingAttempts: 3,
|
|
395
|
+
minTokensForSummary: 800,
|
|
396
|
+
maxFileTokens: 2000,
|
|
378
397
|
authentication: {
|
|
379
398
|
type: 'None',
|
|
380
399
|
credentials: undefined,
|
|
@@ -622,6 +641,24 @@ function loadGitConfig(config) {
|
|
|
622
641
|
service = {
|
|
623
642
|
provider: gitConfigParsed.coco?.serviceProvider,
|
|
624
643
|
model: gitConfigParsed.coco?.serviceModel,
|
|
644
|
+
tokenLimit: gitConfigParsed.coco?.serviceTokenLimit
|
|
645
|
+
? Number(gitConfigParsed.coco.serviceTokenLimit)
|
|
646
|
+
: undefined,
|
|
647
|
+
temperature: gitConfigParsed.coco?.serviceTemperature
|
|
648
|
+
? Number(gitConfigParsed.coco.serviceTemperature)
|
|
649
|
+
: undefined,
|
|
650
|
+
maxConcurrent: gitConfigParsed.coco?.serviceMaxConcurrent
|
|
651
|
+
? Number(gitConfigParsed.coco.serviceMaxConcurrent)
|
|
652
|
+
: undefined,
|
|
653
|
+
minTokensForSummary: gitConfigParsed.coco?.serviceMinTokensForSummary
|
|
654
|
+
? Number(gitConfigParsed.coco.serviceMinTokensForSummary)
|
|
655
|
+
: undefined,
|
|
656
|
+
maxFileTokens: gitConfigParsed.coco?.serviceMaxFileTokens
|
|
657
|
+
? Number(gitConfigParsed.coco.serviceMaxFileTokens)
|
|
658
|
+
: undefined,
|
|
659
|
+
maxParsingAttempts: gitConfigParsed.coco?.serviceMaxParsingAttempts
|
|
660
|
+
? Number(gitConfigParsed.coco.serviceMaxParsingAttempts)
|
|
661
|
+
: undefined,
|
|
625
662
|
authentication: {
|
|
626
663
|
type: 'APIKey',
|
|
627
664
|
credentials: {
|
|
@@ -677,6 +714,24 @@ const appendToGitConfig = async (filePath, config) => {
|
|
|
677
714
|
if (service.authentication.type === 'APIKey') {
|
|
678
715
|
contentLines.push(` serviceApiKey = ${service.authentication.credentials.apiKey}`);
|
|
679
716
|
}
|
|
717
|
+
if (service.tokenLimit !== undefined) {
|
|
718
|
+
contentLines.push(` serviceTokenLimit = ${service.tokenLimit}`);
|
|
719
|
+
}
|
|
720
|
+
if (service.temperature !== undefined) {
|
|
721
|
+
contentLines.push(` serviceTemperature = ${service.temperature}`);
|
|
722
|
+
}
|
|
723
|
+
if (service.maxConcurrent !== undefined) {
|
|
724
|
+
contentLines.push(` serviceMaxConcurrent = ${service.maxConcurrent}`);
|
|
725
|
+
}
|
|
726
|
+
if (service.minTokensForSummary !== undefined) {
|
|
727
|
+
contentLines.push(` serviceMinTokensForSummary = ${service.minTokensForSummary}`);
|
|
728
|
+
}
|
|
729
|
+
if (service.maxFileTokens !== undefined) {
|
|
730
|
+
contentLines.push(` serviceMaxFileTokens = ${service.maxFileTokens}`);
|
|
731
|
+
}
|
|
732
|
+
if (service.maxParsingAttempts !== undefined) {
|
|
733
|
+
contentLines.push(` serviceMaxParsingAttempts = ${service.maxParsingAttempts}`);
|
|
734
|
+
}
|
|
680
735
|
if (service.requestOptions?.timeout) {
|
|
681
736
|
contentLines.push(` serviceRequestOptionsTimeout = ${service.requestOptions.timeout}`);
|
|
682
737
|
}
|
|
@@ -1039,6 +1094,16 @@ const schema$1 = {
|
|
|
1039
1094
|
"description": "The maximum number of requests to make concurrently.",
|
|
1040
1095
|
"default": 6
|
|
1041
1096
|
},
|
|
1097
|
+
"minTokensForSummary": {
|
|
1098
|
+
"type": "number",
|
|
1099
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1100
|
+
"default": 400
|
|
1101
|
+
},
|
|
1102
|
+
"maxFileTokens": {
|
|
1103
|
+
"type": "number",
|
|
1104
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1105
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1106
|
+
},
|
|
1042
1107
|
"authentication": {
|
|
1043
1108
|
"anyOf": [
|
|
1044
1109
|
{
|
|
@@ -1799,6 +1864,16 @@ const schema$1 = {
|
|
|
1799
1864
|
"description": "The maximum number of requests to make concurrently.",
|
|
1800
1865
|
"default": 6
|
|
1801
1866
|
},
|
|
1867
|
+
"minTokensForSummary": {
|
|
1868
|
+
"type": "number",
|
|
1869
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1870
|
+
"default": 400
|
|
1871
|
+
},
|
|
1872
|
+
"maxFileTokens": {
|
|
1873
|
+
"type": "number",
|
|
1874
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1875
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1876
|
+
},
|
|
1802
1877
|
"authentication": {
|
|
1803
1878
|
"anyOf": [
|
|
1804
1879
|
{
|
|
@@ -1950,6 +2025,16 @@ const schema$1 = {
|
|
|
1950
2025
|
"description": "The maximum number of requests to make concurrently.",
|
|
1951
2026
|
"default": 6
|
|
1952
2027
|
},
|
|
2028
|
+
"minTokensForSummary": {
|
|
2029
|
+
"type": "number",
|
|
2030
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
2031
|
+
"default": 400
|
|
2032
|
+
},
|
|
2033
|
+
"maxFileTokens": {
|
|
2034
|
+
"type": "number",
|
|
2035
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
2036
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
2037
|
+
},
|
|
1953
2038
|
"authentication": {
|
|
1954
2039
|
"anyOf": [
|
|
1955
2040
|
{
|
|
@@ -8217,6 +8302,114 @@ async function summarize(documents, { chain, textSplitter, options }) {
|
|
|
8217
8302
|
return res.text && res.text.trim();
|
|
8218
8303
|
}
|
|
8219
8304
|
|
|
8305
|
+
/**
|
|
8306
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8307
|
+
*/
|
|
8308
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8309
|
+
try {
|
|
8310
|
+
const fileSummary = await summarize([
|
|
8311
|
+
{
|
|
8312
|
+
pageContent: fileDiff.diff,
|
|
8313
|
+
metadata: {
|
|
8314
|
+
file: fileDiff.file,
|
|
8315
|
+
summary: fileDiff.summary,
|
|
8316
|
+
},
|
|
8317
|
+
},
|
|
8318
|
+
], {
|
|
8319
|
+
chain,
|
|
8320
|
+
textSplitter,
|
|
8321
|
+
options: {
|
|
8322
|
+
returnIntermediateSteps: false,
|
|
8323
|
+
},
|
|
8324
|
+
});
|
|
8325
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8326
|
+
return {
|
|
8327
|
+
...fileDiff,
|
|
8328
|
+
diff: fileSummary,
|
|
8329
|
+
tokenCount: newTokenCount,
|
|
8330
|
+
};
|
|
8331
|
+
}
|
|
8332
|
+
catch (error) {
|
|
8333
|
+
// On error, return original diff unchanged
|
|
8334
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8335
|
+
return fileDiff;
|
|
8336
|
+
}
|
|
8337
|
+
}
|
|
8338
|
+
/**
|
|
8339
|
+
* Process files in waves to respect concurrency limits.
|
|
8340
|
+
*/
|
|
8341
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8342
|
+
const results = [];
|
|
8343
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8344
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8345
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8346
|
+
results.push(...waveResults);
|
|
8347
|
+
}
|
|
8348
|
+
return results;
|
|
8349
|
+
}
|
|
8350
|
+
/**
|
|
8351
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8352
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8353
|
+
* the final commit message toward a single file's changes.
|
|
8354
|
+
*
|
|
8355
|
+
* @param diffs - Array of file diffs to process
|
|
8356
|
+
* @param options - Configuration options for summarization
|
|
8357
|
+
* @returns Array of file diffs with large files summarized
|
|
8358
|
+
*/
|
|
8359
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8360
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8361
|
+
// Identify files that need summarization
|
|
8362
|
+
const filesToSummarize = [];
|
|
8363
|
+
const results = [...diffs];
|
|
8364
|
+
diffs.forEach((diff, index) => {
|
|
8365
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8366
|
+
filesToSummarize.push({ index, diff });
|
|
8367
|
+
}
|
|
8368
|
+
});
|
|
8369
|
+
if (filesToSummarize.length === 0) {
|
|
8370
|
+
return results;
|
|
8371
|
+
}
|
|
8372
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8373
|
+
// Process large files in waves
|
|
8374
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8375
|
+
// Update results with summarized files
|
|
8376
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8377
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8378
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8379
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8380
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8381
|
+
results[originalIndex] = summarizedDiff;
|
|
8382
|
+
});
|
|
8383
|
+
return results;
|
|
8384
|
+
}
|
|
8385
|
+
/**
|
|
8386
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8387
|
+
* Returns a new DiffNode with updated token counts.
|
|
8388
|
+
*/
|
|
8389
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8390
|
+
// Collect all diffs from the tree
|
|
8391
|
+
const allDiffs = [];
|
|
8392
|
+
function collectDiffs(node) {
|
|
8393
|
+
allDiffs.push(...node.diffs);
|
|
8394
|
+
node.children.forEach(collectDiffs);
|
|
8395
|
+
}
|
|
8396
|
+
collectDiffs(rootNode);
|
|
8397
|
+
// Summarize large files
|
|
8398
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8399
|
+
// Create a map for quick lookup
|
|
8400
|
+
const diffMap = new Map();
|
|
8401
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8402
|
+
// Rebuild tree with processed diffs
|
|
8403
|
+
function rebuildNode(node) {
|
|
8404
|
+
return {
|
|
8405
|
+
path: node.path,
|
|
8406
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8407
|
+
children: node.children.map(rebuildNode),
|
|
8408
|
+
};
|
|
8409
|
+
}
|
|
8410
|
+
return rebuildNode(rootNode);
|
|
8411
|
+
}
|
|
8412
|
+
|
|
8220
8413
|
/**
|
|
8221
8414
|
* Create groups from a given node info.
|
|
8222
8415
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8269,6 +8462,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8269
8462
|
return directory;
|
|
8270
8463
|
}
|
|
8271
8464
|
}
|
|
8465
|
+
/**
|
|
8466
|
+
* Default output formatter for directory diffs.
|
|
8467
|
+
*
|
|
8468
|
+
* TODO: Future improvements to consider:
|
|
8469
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8470
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8471
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8472
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8473
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8474
|
+
*/
|
|
8272
8475
|
const defaultOutputCallback = (group) => {
|
|
8273
8476
|
let output = `
|
|
8274
8477
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8280,41 +8483,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8280
8483
|
}
|
|
8281
8484
|
return output;
|
|
8282
8485
|
};
|
|
8283
|
-
|
|
8284
|
-
|
|
8486
|
+
/**
|
|
8487
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8488
|
+
* while maintaining predictable behavior.
|
|
8489
|
+
*/
|
|
8490
|
+
async function summarizeInWaves(directories, options) {
|
|
8491
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8492
|
+
let totalTokenCount = initialTotal;
|
|
8493
|
+
const results = [...directories];
|
|
8494
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8495
|
+
const sortedIndices = directories
|
|
8496
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8497
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8498
|
+
let cursor = 0;
|
|
8499
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8500
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8501
|
+
const wave = [];
|
|
8502
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8503
|
+
const { index, tokens } = sortedIndices[i];
|
|
8504
|
+
// Skip directories below the minimum threshold
|
|
8505
|
+
if (tokens < minTokensForSummary) {
|
|
8506
|
+
cursor = i + 1;
|
|
8507
|
+
continue;
|
|
8508
|
+
}
|
|
8509
|
+
// Skip directories that have already been summarized
|
|
8510
|
+
if (results[index].summary) {
|
|
8511
|
+
cursor = i + 1;
|
|
8512
|
+
continue;
|
|
8513
|
+
}
|
|
8514
|
+
wave.push(index);
|
|
8515
|
+
cursor = i + 1;
|
|
8516
|
+
}
|
|
8517
|
+
// No more eligible candidates
|
|
8518
|
+
if (wave.length === 0) {
|
|
8519
|
+
break;
|
|
8520
|
+
}
|
|
8521
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8522
|
+
// Process wave in parallel
|
|
8523
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8524
|
+
// Update results and recalculate total
|
|
8525
|
+
waveResults.forEach((result, i) => {
|
|
8526
|
+
const idx = wave[i];
|
|
8527
|
+
const originalTokens = results[idx].tokenCount;
|
|
8528
|
+
const newTokens = result.tokenCount;
|
|
8529
|
+
const reduction = originalTokens - newTokens;
|
|
8530
|
+
totalTokenCount -= reduction;
|
|
8531
|
+
results[idx] = result;
|
|
8532
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8533
|
+
color: 'magenta',
|
|
8534
|
+
});
|
|
8535
|
+
});
|
|
8536
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8537
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8538
|
+
});
|
|
8539
|
+
// Check if we're now under budget
|
|
8540
|
+
if (totalTokenCount <= maxTokens) {
|
|
8541
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8542
|
+
break;
|
|
8543
|
+
}
|
|
8544
|
+
}
|
|
8545
|
+
return { directories: results, totalTokenCount };
|
|
8546
|
+
}
|
|
8547
|
+
/**
|
|
8548
|
+
* Summarize diffs using a three-phase approach:
|
|
8549
|
+
*
|
|
8550
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8551
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8552
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8553
|
+
*
|
|
8554
|
+
* This approach ensures:
|
|
8555
|
+
* - Large files don't bias the summary
|
|
8556
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8557
|
+
* - Efficient parallel processing with predictable behavior
|
|
8558
|
+
* - Early exit when under token budget
|
|
8559
|
+
*/
|
|
8560
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8561
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8562
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8563
|
+
// PHASE 1: Pre-process large files
|
|
8564
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8565
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8566
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8567
|
+
minTokensForSummary,
|
|
8568
|
+
maxConcurrent,
|
|
8569
|
+
tokenizer,
|
|
8570
|
+
logger,
|
|
8571
|
+
chain,
|
|
8572
|
+
textSplitter,
|
|
8573
|
+
});
|
|
8574
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8575
|
+
// PHASE 2: Directory grouping & assessment
|
|
8285
8576
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8286
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8287
|
-
// Sort by token count descending
|
|
8577
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8578
|
+
// Sort by token count descending for consistent output ordering
|
|
8288
8579
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8289
|
-
|
|
8580
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8290
8581
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8291
|
-
logger.
|
|
8292
|
-
|
|
8293
|
-
|
|
8294
|
-
|
|
8295
|
-
|
|
8296
|
-
|
|
8297
|
-
|
|
8298
|
-
|
|
8299
|
-
|
|
8300
|
-
|
|
8301
|
-
|
|
8302
|
-
|
|
8303
|
-
|
|
8304
|
-
|
|
8305
|
-
|
|
8306
|
-
|
|
8307
|
-
|
|
8308
|
-
|
|
8309
|
-
|
|
8310
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8311
|
-
});
|
|
8312
|
-
return group;
|
|
8313
|
-
}, { priority: group.tokenCount });
|
|
8582
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8583
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8584
|
+
});
|
|
8585
|
+
// Early exit if already under budget
|
|
8586
|
+
if (totalTokenCount <= maxTokens) {
|
|
8587
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8588
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8589
|
+
}
|
|
8590
|
+
// PHASE 3: Wave-based summarization
|
|
8591
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8592
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8593
|
+
totalTokenCount,
|
|
8594
|
+
maxTokens,
|
|
8595
|
+
minTokensForSummary,
|
|
8596
|
+
maxConcurrent,
|
|
8597
|
+
logger,
|
|
8598
|
+
chain,
|
|
8599
|
+
textSplitter,
|
|
8600
|
+
tokenizer,
|
|
8314
8601
|
});
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
return directoryDiffs.map(handleOutput).join('');
|
|
8602
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8603
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8318
8604
|
}
|
|
8319
8605
|
|
|
8320
8606
|
/**
|
|
@@ -11314,7 +11600,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11314
11600
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11315
11601
|
}
|
|
11316
11602
|
|
|
11317
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11603
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11318
11604
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11319
11605
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11320
11606
|
type: 'map_reduce',
|
|
@@ -11328,11 +11614,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11328
11614
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11329
11615
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11330
11616
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11331
|
-
// Summarize diffs
|
|
11617
|
+
// Summarize diffs using three-phase approach:
|
|
11618
|
+
// 1. Pre-process large files to prevent bias
|
|
11619
|
+
// 2. Group by directory and assess token count
|
|
11620
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11332
11621
|
logger.startTimer();
|
|
11333
11622
|
const summary = await summarizeDiffs(diffs, {
|
|
11334
11623
|
tokenizer,
|
|
11335
|
-
maxTokens: maxTokens ||
|
|
11624
|
+
maxTokens: maxTokens || 2048,
|
|
11625
|
+
minTokensForSummary,
|
|
11626
|
+
maxFileTokens,
|
|
11627
|
+
maxConcurrent,
|
|
11336
11628
|
textSplitter,
|
|
11337
11629
|
chain: summarizationChain,
|
|
11338
11630
|
logger,
|
|
@@ -11635,7 +11927,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11635
11927
|
return await fileChangeParser({
|
|
11636
11928
|
changes,
|
|
11637
11929
|
commit: '--staged',
|
|
11638
|
-
options: {
|
|
11930
|
+
options: {
|
|
11931
|
+
tokenizer,
|
|
11932
|
+
git,
|
|
11933
|
+
llm,
|
|
11934
|
+
logger,
|
|
11935
|
+
maxTokens: config.service.tokenLimit,
|
|
11936
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11937
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11938
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11939
|
+
},
|
|
11639
11940
|
});
|
|
11640
11941
|
}
|
|
11641
11942
|
const commitMsg = await generateAndReviewLoop({
|
package/dist/index.js
CHANGED
|
@@ -26,7 +26,6 @@ var outputs = require('@langchain/core/outputs');
|
|
|
26
26
|
var manager = require('@langchain/core/callbacks/manager');
|
|
27
27
|
require('@langchain/core/utils/json_patch');
|
|
28
28
|
var simpleGit = require('simple-git');
|
|
29
|
-
var pQueue = require('p-queue');
|
|
30
29
|
var documents = require('@langchain/core/documents');
|
|
31
30
|
var diff = require('diff');
|
|
32
31
|
require('@langchain/core/messages');
|
|
@@ -69,7 +68,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
|
|
|
69
68
|
/**
|
|
70
69
|
* Current build version from package.json
|
|
71
70
|
*/
|
|
72
|
-
const BUILD_VERSION = "0.
|
|
71
|
+
const BUILD_VERSION = "0.26.0";
|
|
73
72
|
|
|
74
73
|
const isInteractive = (config) => {
|
|
75
74
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -184,6 +183,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
184
183
|
fs.writeFileSync(filePath, newLines.join('\n'));
|
|
185
184
|
}
|
|
186
185
|
|
|
186
|
+
/**
|
|
187
|
+
* Prompt template for summarizing code diffs.
|
|
188
|
+
*
|
|
189
|
+
* TODO: Future improvements to consider:
|
|
190
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
191
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
192
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
193
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
194
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
195
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
196
|
+
*/
|
|
187
197
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
188
198
|
|
|
189
199
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -369,8 +379,11 @@ function getDefaultServiceApiKey(config) {
|
|
|
369
379
|
const DEFAULT_OPENAI_LLM_SERVICE = {
|
|
370
380
|
provider: 'openai',
|
|
371
381
|
model: 'gpt-4o-mini',
|
|
372
|
-
tokenLimit:
|
|
382
|
+
tokenLimit: 4096,
|
|
373
383
|
temperature: 0.32,
|
|
384
|
+
maxConcurrent: 12,
|
|
385
|
+
minTokensForSummary: 800,
|
|
386
|
+
maxFileTokens: 2000,
|
|
374
387
|
authentication: {
|
|
375
388
|
type: 'APIKey',
|
|
376
389
|
credentials: {
|
|
@@ -382,6 +395,10 @@ const DEFAULT_ANTHROPIC_LLM_SERVICE = {
|
|
|
382
395
|
provider: 'anthropic',
|
|
383
396
|
model: 'claude-3-5-sonnet-20240620',
|
|
384
397
|
temperature: 0.32,
|
|
398
|
+
tokenLimit: 4096,
|
|
399
|
+
maxConcurrent: 12,
|
|
400
|
+
minTokensForSummary: 800,
|
|
401
|
+
maxFileTokens: 2000,
|
|
385
402
|
authentication: {
|
|
386
403
|
type: 'APIKey',
|
|
387
404
|
credentials: {
|
|
@@ -394,9 +411,11 @@ const DEFAULT_OLLAMA_LLM_SERVICE = {
|
|
|
394
411
|
model: 'llama3',
|
|
395
412
|
endpoint: 'http://localhost:11434',
|
|
396
413
|
maxConcurrent: 1,
|
|
397
|
-
tokenLimit:
|
|
414
|
+
tokenLimit: 4096,
|
|
398
415
|
temperature: 0.4,
|
|
399
416
|
maxParsingAttempts: 3,
|
|
417
|
+
minTokensForSummary: 800,
|
|
418
|
+
maxFileTokens: 2000,
|
|
400
419
|
authentication: {
|
|
401
420
|
type: 'None',
|
|
402
421
|
credentials: undefined,
|
|
@@ -644,6 +663,24 @@ function loadGitConfig(config) {
|
|
|
644
663
|
service = {
|
|
645
664
|
provider: gitConfigParsed.coco?.serviceProvider,
|
|
646
665
|
model: gitConfigParsed.coco?.serviceModel,
|
|
666
|
+
tokenLimit: gitConfigParsed.coco?.serviceTokenLimit
|
|
667
|
+
? Number(gitConfigParsed.coco.serviceTokenLimit)
|
|
668
|
+
: undefined,
|
|
669
|
+
temperature: gitConfigParsed.coco?.serviceTemperature
|
|
670
|
+
? Number(gitConfigParsed.coco.serviceTemperature)
|
|
671
|
+
: undefined,
|
|
672
|
+
maxConcurrent: gitConfigParsed.coco?.serviceMaxConcurrent
|
|
673
|
+
? Number(gitConfigParsed.coco.serviceMaxConcurrent)
|
|
674
|
+
: undefined,
|
|
675
|
+
minTokensForSummary: gitConfigParsed.coco?.serviceMinTokensForSummary
|
|
676
|
+
? Number(gitConfigParsed.coco.serviceMinTokensForSummary)
|
|
677
|
+
: undefined,
|
|
678
|
+
maxFileTokens: gitConfigParsed.coco?.serviceMaxFileTokens
|
|
679
|
+
? Number(gitConfigParsed.coco.serviceMaxFileTokens)
|
|
680
|
+
: undefined,
|
|
681
|
+
maxParsingAttempts: gitConfigParsed.coco?.serviceMaxParsingAttempts
|
|
682
|
+
? Number(gitConfigParsed.coco.serviceMaxParsingAttempts)
|
|
683
|
+
: undefined,
|
|
647
684
|
authentication: {
|
|
648
685
|
type: 'APIKey',
|
|
649
686
|
credentials: {
|
|
@@ -699,6 +736,24 @@ const appendToGitConfig = async (filePath, config) => {
|
|
|
699
736
|
if (service.authentication.type === 'APIKey') {
|
|
700
737
|
contentLines.push(` serviceApiKey = ${service.authentication.credentials.apiKey}`);
|
|
701
738
|
}
|
|
739
|
+
if (service.tokenLimit !== undefined) {
|
|
740
|
+
contentLines.push(` serviceTokenLimit = ${service.tokenLimit}`);
|
|
741
|
+
}
|
|
742
|
+
if (service.temperature !== undefined) {
|
|
743
|
+
contentLines.push(` serviceTemperature = ${service.temperature}`);
|
|
744
|
+
}
|
|
745
|
+
if (service.maxConcurrent !== undefined) {
|
|
746
|
+
contentLines.push(` serviceMaxConcurrent = ${service.maxConcurrent}`);
|
|
747
|
+
}
|
|
748
|
+
if (service.minTokensForSummary !== undefined) {
|
|
749
|
+
contentLines.push(` serviceMinTokensForSummary = ${service.minTokensForSummary}`);
|
|
750
|
+
}
|
|
751
|
+
if (service.maxFileTokens !== undefined) {
|
|
752
|
+
contentLines.push(` serviceMaxFileTokens = ${service.maxFileTokens}`);
|
|
753
|
+
}
|
|
754
|
+
if (service.maxParsingAttempts !== undefined) {
|
|
755
|
+
contentLines.push(` serviceMaxParsingAttempts = ${service.maxParsingAttempts}`);
|
|
756
|
+
}
|
|
702
757
|
if (service.requestOptions?.timeout) {
|
|
703
758
|
contentLines.push(` serviceRequestOptionsTimeout = ${service.requestOptions.timeout}`);
|
|
704
759
|
}
|
|
@@ -1061,6 +1116,16 @@ const schema$1 = {
|
|
|
1061
1116
|
"description": "The maximum number of requests to make concurrently.",
|
|
1062
1117
|
"default": 6
|
|
1063
1118
|
},
|
|
1119
|
+
"minTokensForSummary": {
|
|
1120
|
+
"type": "number",
|
|
1121
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1122
|
+
"default": 400
|
|
1123
|
+
},
|
|
1124
|
+
"maxFileTokens": {
|
|
1125
|
+
"type": "number",
|
|
1126
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1127
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1128
|
+
},
|
|
1064
1129
|
"authentication": {
|
|
1065
1130
|
"anyOf": [
|
|
1066
1131
|
{
|
|
@@ -1821,6 +1886,16 @@ const schema$1 = {
|
|
|
1821
1886
|
"description": "The maximum number of requests to make concurrently.",
|
|
1822
1887
|
"default": 6
|
|
1823
1888
|
},
|
|
1889
|
+
"minTokensForSummary": {
|
|
1890
|
+
"type": "number",
|
|
1891
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1892
|
+
"default": 400
|
|
1893
|
+
},
|
|
1894
|
+
"maxFileTokens": {
|
|
1895
|
+
"type": "number",
|
|
1896
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1897
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1898
|
+
},
|
|
1824
1899
|
"authentication": {
|
|
1825
1900
|
"anyOf": [
|
|
1826
1901
|
{
|
|
@@ -1972,6 +2047,16 @@ const schema$1 = {
|
|
|
1972
2047
|
"description": "The maximum number of requests to make concurrently.",
|
|
1973
2048
|
"default": 6
|
|
1974
2049
|
},
|
|
2050
|
+
"minTokensForSummary": {
|
|
2051
|
+
"type": "number",
|
|
2052
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
2053
|
+
"default": 400
|
|
2054
|
+
},
|
|
2055
|
+
"maxFileTokens": {
|
|
2056
|
+
"type": "number",
|
|
2057
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
2058
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
2059
|
+
},
|
|
1975
2060
|
"authentication": {
|
|
1976
2061
|
"anyOf": [
|
|
1977
2062
|
{
|
|
@@ -8239,6 +8324,114 @@ async function summarize(documents$1, { chain, textSplitter, options }) {
|
|
|
8239
8324
|
return res.text && res.text.trim();
|
|
8240
8325
|
}
|
|
8241
8326
|
|
|
8327
|
+
/**
|
|
8328
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8329
|
+
*/
|
|
8330
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8331
|
+
try {
|
|
8332
|
+
const fileSummary = await summarize([
|
|
8333
|
+
{
|
|
8334
|
+
pageContent: fileDiff.diff,
|
|
8335
|
+
metadata: {
|
|
8336
|
+
file: fileDiff.file,
|
|
8337
|
+
summary: fileDiff.summary,
|
|
8338
|
+
},
|
|
8339
|
+
},
|
|
8340
|
+
], {
|
|
8341
|
+
chain,
|
|
8342
|
+
textSplitter,
|
|
8343
|
+
options: {
|
|
8344
|
+
returnIntermediateSteps: false,
|
|
8345
|
+
},
|
|
8346
|
+
});
|
|
8347
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8348
|
+
return {
|
|
8349
|
+
...fileDiff,
|
|
8350
|
+
diff: fileSummary,
|
|
8351
|
+
tokenCount: newTokenCount,
|
|
8352
|
+
};
|
|
8353
|
+
}
|
|
8354
|
+
catch (error) {
|
|
8355
|
+
// On error, return original diff unchanged
|
|
8356
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8357
|
+
return fileDiff;
|
|
8358
|
+
}
|
|
8359
|
+
}
|
|
8360
|
+
/**
|
|
8361
|
+
* Process files in waves to respect concurrency limits.
|
|
8362
|
+
*/
|
|
8363
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8364
|
+
const results = [];
|
|
8365
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8366
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8367
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8368
|
+
results.push(...waveResults);
|
|
8369
|
+
}
|
|
8370
|
+
return results;
|
|
8371
|
+
}
|
|
8372
|
+
/**
|
|
8373
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8374
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8375
|
+
* the final commit message toward a single file's changes.
|
|
8376
|
+
*
|
|
8377
|
+
* @param diffs - Array of file diffs to process
|
|
8378
|
+
* @param options - Configuration options for summarization
|
|
8379
|
+
* @returns Array of file diffs with large files summarized
|
|
8380
|
+
*/
|
|
8381
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8382
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8383
|
+
// Identify files that need summarization
|
|
8384
|
+
const filesToSummarize = [];
|
|
8385
|
+
const results = [...diffs];
|
|
8386
|
+
diffs.forEach((diff, index) => {
|
|
8387
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8388
|
+
filesToSummarize.push({ index, diff });
|
|
8389
|
+
}
|
|
8390
|
+
});
|
|
8391
|
+
if (filesToSummarize.length === 0) {
|
|
8392
|
+
return results;
|
|
8393
|
+
}
|
|
8394
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8395
|
+
// Process large files in waves
|
|
8396
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8397
|
+
// Update results with summarized files
|
|
8398
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8399
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8400
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8401
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8402
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8403
|
+
results[originalIndex] = summarizedDiff;
|
|
8404
|
+
});
|
|
8405
|
+
return results;
|
|
8406
|
+
}
|
|
8407
|
+
/**
|
|
8408
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8409
|
+
* Returns a new DiffNode with updated token counts.
|
|
8410
|
+
*/
|
|
8411
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8412
|
+
// Collect all diffs from the tree
|
|
8413
|
+
const allDiffs = [];
|
|
8414
|
+
function collectDiffs(node) {
|
|
8415
|
+
allDiffs.push(...node.diffs);
|
|
8416
|
+
node.children.forEach(collectDiffs);
|
|
8417
|
+
}
|
|
8418
|
+
collectDiffs(rootNode);
|
|
8419
|
+
// Summarize large files
|
|
8420
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8421
|
+
// Create a map for quick lookup
|
|
8422
|
+
const diffMap = new Map();
|
|
8423
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8424
|
+
// Rebuild tree with processed diffs
|
|
8425
|
+
function rebuildNode(node) {
|
|
8426
|
+
return {
|
|
8427
|
+
path: node.path,
|
|
8428
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8429
|
+
children: node.children.map(rebuildNode),
|
|
8430
|
+
};
|
|
8431
|
+
}
|
|
8432
|
+
return rebuildNode(rootNode);
|
|
8433
|
+
}
|
|
8434
|
+
|
|
8242
8435
|
/**
|
|
8243
8436
|
* Create groups from a given node info.
|
|
8244
8437
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8291,6 +8484,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8291
8484
|
return directory;
|
|
8292
8485
|
}
|
|
8293
8486
|
}
|
|
8487
|
+
/**
|
|
8488
|
+
* Default output formatter for directory diffs.
|
|
8489
|
+
*
|
|
8490
|
+
* TODO: Future improvements to consider:
|
|
8491
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8492
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8493
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8494
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8495
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8496
|
+
*/
|
|
8294
8497
|
const defaultOutputCallback = (group) => {
|
|
8295
8498
|
let output = `
|
|
8296
8499
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8302,41 +8505,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8302
8505
|
}
|
|
8303
8506
|
return output;
|
|
8304
8507
|
};
|
|
8305
|
-
|
|
8306
|
-
|
|
8508
|
+
/**
|
|
8509
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8510
|
+
* while maintaining predictable behavior.
|
|
8511
|
+
*/
|
|
8512
|
+
async function summarizeInWaves(directories, options) {
|
|
8513
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8514
|
+
let totalTokenCount = initialTotal;
|
|
8515
|
+
const results = [...directories];
|
|
8516
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8517
|
+
const sortedIndices = directories
|
|
8518
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8519
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8520
|
+
let cursor = 0;
|
|
8521
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8522
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8523
|
+
const wave = [];
|
|
8524
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8525
|
+
const { index, tokens } = sortedIndices[i];
|
|
8526
|
+
// Skip directories below the minimum threshold
|
|
8527
|
+
if (tokens < minTokensForSummary) {
|
|
8528
|
+
cursor = i + 1;
|
|
8529
|
+
continue;
|
|
8530
|
+
}
|
|
8531
|
+
// Skip directories that have already been summarized
|
|
8532
|
+
if (results[index].summary) {
|
|
8533
|
+
cursor = i + 1;
|
|
8534
|
+
continue;
|
|
8535
|
+
}
|
|
8536
|
+
wave.push(index);
|
|
8537
|
+
cursor = i + 1;
|
|
8538
|
+
}
|
|
8539
|
+
// No more eligible candidates
|
|
8540
|
+
if (wave.length === 0) {
|
|
8541
|
+
break;
|
|
8542
|
+
}
|
|
8543
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8544
|
+
// Process wave in parallel
|
|
8545
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8546
|
+
// Update results and recalculate total
|
|
8547
|
+
waveResults.forEach((result, i) => {
|
|
8548
|
+
const idx = wave[i];
|
|
8549
|
+
const originalTokens = results[idx].tokenCount;
|
|
8550
|
+
const newTokens = result.tokenCount;
|
|
8551
|
+
const reduction = originalTokens - newTokens;
|
|
8552
|
+
totalTokenCount -= reduction;
|
|
8553
|
+
results[idx] = result;
|
|
8554
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8555
|
+
color: 'magenta',
|
|
8556
|
+
});
|
|
8557
|
+
});
|
|
8558
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8559
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8560
|
+
});
|
|
8561
|
+
// Check if we're now under budget
|
|
8562
|
+
if (totalTokenCount <= maxTokens) {
|
|
8563
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8564
|
+
break;
|
|
8565
|
+
}
|
|
8566
|
+
}
|
|
8567
|
+
return { directories: results, totalTokenCount };
|
|
8568
|
+
}
|
|
8569
|
+
/**
|
|
8570
|
+
* Summarize diffs using a three-phase approach:
|
|
8571
|
+
*
|
|
8572
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8573
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8574
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8575
|
+
*
|
|
8576
|
+
* This approach ensures:
|
|
8577
|
+
* - Large files don't bias the summary
|
|
8578
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8579
|
+
* - Efficient parallel processing with predictable behavior
|
|
8580
|
+
* - Early exit when under token budget
|
|
8581
|
+
*/
|
|
8582
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8583
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8584
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8585
|
+
// PHASE 1: Pre-process large files
|
|
8586
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8587
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8588
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8589
|
+
minTokensForSummary,
|
|
8590
|
+
maxConcurrent,
|
|
8591
|
+
tokenizer,
|
|
8592
|
+
logger,
|
|
8593
|
+
chain,
|
|
8594
|
+
textSplitter,
|
|
8595
|
+
});
|
|
8596
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8597
|
+
// PHASE 2: Directory grouping & assessment
|
|
8307
8598
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8308
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8309
|
-
// Sort by token count descending
|
|
8599
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8600
|
+
// Sort by token count descending for consistent output ordering
|
|
8310
8601
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8311
|
-
|
|
8602
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8312
8603
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8313
|
-
logger.
|
|
8314
|
-
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
|
|
8318
|
-
|
|
8319
|
-
|
|
8320
|
-
|
|
8321
|
-
|
|
8322
|
-
|
|
8323
|
-
|
|
8324
|
-
|
|
8325
|
-
|
|
8326
|
-
|
|
8327
|
-
|
|
8328
|
-
|
|
8329
|
-
|
|
8330
|
-
|
|
8331
|
-
|
|
8332
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8333
|
-
});
|
|
8334
|
-
return group;
|
|
8335
|
-
}, { priority: group.tokenCount });
|
|
8604
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8605
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8606
|
+
});
|
|
8607
|
+
// Early exit if already under budget
|
|
8608
|
+
if (totalTokenCount <= maxTokens) {
|
|
8609
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8610
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8611
|
+
}
|
|
8612
|
+
// PHASE 3: Wave-based summarization
|
|
8613
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8614
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8615
|
+
totalTokenCount,
|
|
8616
|
+
maxTokens,
|
|
8617
|
+
minTokensForSummary,
|
|
8618
|
+
maxConcurrent,
|
|
8619
|
+
logger,
|
|
8620
|
+
chain,
|
|
8621
|
+
textSplitter,
|
|
8622
|
+
tokenizer,
|
|
8336
8623
|
});
|
|
8337
|
-
|
|
8338
|
-
|
|
8339
|
-
return directoryDiffs.map(handleOutput).join('');
|
|
8624
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8625
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8340
8626
|
}
|
|
8341
8627
|
|
|
8342
8628
|
/**
|
|
@@ -11336,7 +11622,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11336
11622
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11337
11623
|
}
|
|
11338
11624
|
|
|
11339
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11625
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11340
11626
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11341
11627
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11342
11628
|
type: 'map_reduce',
|
|
@@ -11350,11 +11636,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11350
11636
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11351
11637
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11352
11638
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11353
|
-
// Summarize diffs
|
|
11639
|
+
// Summarize diffs using three-phase approach:
|
|
11640
|
+
// 1. Pre-process large files to prevent bias
|
|
11641
|
+
// 2. Group by directory and assess token count
|
|
11642
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11354
11643
|
logger.startTimer();
|
|
11355
11644
|
const summary = await summarizeDiffs(diffs, {
|
|
11356
11645
|
tokenizer,
|
|
11357
|
-
maxTokens: maxTokens ||
|
|
11646
|
+
maxTokens: maxTokens || 2048,
|
|
11647
|
+
minTokensForSummary,
|
|
11648
|
+
maxFileTokens,
|
|
11649
|
+
maxConcurrent,
|
|
11358
11650
|
textSplitter,
|
|
11359
11651
|
chain: summarizationChain,
|
|
11360
11652
|
logger,
|
|
@@ -11657,7 +11949,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11657
11949
|
return await fileChangeParser({
|
|
11658
11950
|
changes,
|
|
11659
11951
|
commit: '--staged',
|
|
11660
|
-
options: {
|
|
11952
|
+
options: {
|
|
11953
|
+
tokenizer,
|
|
11954
|
+
git,
|
|
11955
|
+
llm,
|
|
11956
|
+
logger,
|
|
11957
|
+
maxTokens: config.service.tokenLimit,
|
|
11958
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11959
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11960
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11961
|
+
},
|
|
11661
11962
|
});
|
|
11662
11963
|
}
|
|
11663
11964
|
const commitMsg = await generateAndReviewLoop({
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "git-coco",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.26.0",
|
|
4
4
|
"description": "zero-effort git commits with coco.",
|
|
5
5
|
"author": "gfargo <ghfargo@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
"@types/diff": "^8.0.0",
|
|
54
54
|
"@types/ini": "^4.1.1",
|
|
55
55
|
"@types/jest": "^30.0.0",
|
|
56
|
-
"@types/node": "^
|
|
56
|
+
"@types/node": "^25.0.10",
|
|
57
57
|
"@types/yargs": "^17.0.33",
|
|
58
58
|
"@typescript-eslint/eslint-plugin": "^7.13.1",
|
|
59
59
|
"@typescript-eslint/parser": "^7.13.1",
|
|
@@ -77,20 +77,20 @@
|
|
|
77
77
|
"@commitlint/core": "^19.8.0",
|
|
78
78
|
"@inquirer/prompts": "3.3.0",
|
|
79
79
|
"@langchain/anthropic": "^0.3.14",
|
|
80
|
-
"@langchain/community": "^0.3.
|
|
81
|
-
"@langchain/core": "^0.3.
|
|
80
|
+
"@langchain/community": "^0.3.58",
|
|
81
|
+
"@langchain/core": "^0.3.80",
|
|
82
82
|
"@langchain/ollama": "^0.2.0",
|
|
83
83
|
"@langchain/openai": "^0.6.7",
|
|
84
84
|
"ajv": "^8.16.0",
|
|
85
85
|
"chalk": "4.1.2",
|
|
86
|
-
"diff": "8.0.
|
|
86
|
+
"diff": "8.0.3",
|
|
87
87
|
"ini": "5.0.0",
|
|
88
88
|
"minimatch": "^9.0.5",
|
|
89
89
|
"ora": "5.4.1",
|
|
90
90
|
"p-queue": "5.0.0",
|
|
91
91
|
"performance-now": "2.1.0",
|
|
92
92
|
"pretty-ms": "7.0.1",
|
|
93
|
-
"simple-git": "3.
|
|
93
|
+
"simple-git": "3.30.0",
|
|
94
94
|
"tiktoken": "^1.0.21",
|
|
95
95
|
"yargs": "17.7.2"
|
|
96
96
|
},
|