git-coco 0.23.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +31 -0
- package/dist/index.esm.mjs +439 -55
- package/dist/index.js +439 -55
- package/package.json +5 -5
package/dist/index.esm.mjs
CHANGED
|
@@ -27,7 +27,6 @@ import { RUN_KEY } from '@langchain/core/outputs';
|
|
|
27
27
|
import { CallbackManager, parseCallbackConfigArg } from '@langchain/core/callbacks/manager';
|
|
28
28
|
import '@langchain/core/utils/json_patch';
|
|
29
29
|
import { simpleGit } from 'simple-git';
|
|
30
|
-
import pQueue from 'p-queue';
|
|
31
30
|
import { Document, BaseDocumentTransformer } from '@langchain/core/documents';
|
|
32
31
|
import { createTwoFilesPatch } from 'diff';
|
|
33
32
|
import '@langchain/core/messages';
|
|
@@ -47,7 +46,7 @@ import { pathToFileURL } from 'url';
|
|
|
47
46
|
/**
|
|
48
47
|
* Current build version from package.json
|
|
49
48
|
*/
|
|
50
|
-
const BUILD_VERSION = "0.
|
|
49
|
+
const BUILD_VERSION = "0.25.0";
|
|
51
50
|
|
|
52
51
|
const isInteractive = (config) => {
|
|
53
52
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -162,6 +161,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
162
161
|
fs__default.writeFileSync(filePath, newLines.join('\n'));
|
|
163
162
|
}
|
|
164
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Prompt template for summarizing code diffs.
|
|
166
|
+
*
|
|
167
|
+
* TODO: Future improvements to consider:
|
|
168
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
169
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
170
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
171
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
172
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
173
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
174
|
+
*/
|
|
165
175
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
166
176
|
|
|
167
177
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -1039,6 +1049,16 @@ const schema$1 = {
|
|
|
1039
1049
|
"description": "The maximum number of requests to make concurrently.",
|
|
1040
1050
|
"default": 6
|
|
1041
1051
|
},
|
|
1052
|
+
"minTokensForSummary": {
|
|
1053
|
+
"type": "number",
|
|
1054
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1055
|
+
"default": 400
|
|
1056
|
+
},
|
|
1057
|
+
"maxFileTokens": {
|
|
1058
|
+
"type": "number",
|
|
1059
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1060
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1061
|
+
},
|
|
1042
1062
|
"authentication": {
|
|
1043
1063
|
"anyOf": [
|
|
1044
1064
|
{
|
|
@@ -1799,6 +1819,16 @@ const schema$1 = {
|
|
|
1799
1819
|
"description": "The maximum number of requests to make concurrently.",
|
|
1800
1820
|
"default": 6
|
|
1801
1821
|
},
|
|
1822
|
+
"minTokensForSummary": {
|
|
1823
|
+
"type": "number",
|
|
1824
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1825
|
+
"default": 400
|
|
1826
|
+
},
|
|
1827
|
+
"maxFileTokens": {
|
|
1828
|
+
"type": "number",
|
|
1829
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1830
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1831
|
+
},
|
|
1802
1832
|
"authentication": {
|
|
1803
1833
|
"anyOf": [
|
|
1804
1834
|
{
|
|
@@ -1950,6 +1980,16 @@ const schema$1 = {
|
|
|
1950
1980
|
"description": "The maximum number of requests to make concurrently.",
|
|
1951
1981
|
"default": 6
|
|
1952
1982
|
},
|
|
1983
|
+
"minTokensForSummary": {
|
|
1984
|
+
"type": "number",
|
|
1985
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1986
|
+
"default": 400
|
|
1987
|
+
},
|
|
1988
|
+
"maxFileTokens": {
|
|
1989
|
+
"type": "number",
|
|
1990
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1991
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1992
|
+
},
|
|
1953
1993
|
"authentication": {
|
|
1954
1994
|
"anyOf": [
|
|
1955
1995
|
{
|
|
@@ -6058,9 +6098,13 @@ const options$4 = {
|
|
|
6058
6098
|
alias: 'b',
|
|
6059
6099
|
description: 'Target branch to compare against',
|
|
6060
6100
|
},
|
|
6101
|
+
tag: {
|
|
6102
|
+
type: 'string',
|
|
6103
|
+
alias: 't',
|
|
6104
|
+
description: 'Target tag to compare against',
|
|
6105
|
+
},
|
|
6061
6106
|
sinceLastTag: {
|
|
6062
6107
|
type: 'boolean',
|
|
6063
|
-
alias: 't',
|
|
6064
6108
|
description: 'Generate changelog for all commits since the last tag',
|
|
6065
6109
|
default: false,
|
|
6066
6110
|
},
|
|
@@ -7074,6 +7118,37 @@ async function getCommitLogAgainstBranch({ git, logger, targetBranch, }) {
|
|
|
7074
7118
|
return [];
|
|
7075
7119
|
}
|
|
7076
7120
|
|
|
7121
|
+
/**
|
|
7122
|
+
* Retrieves the commit log between the current branch and a specified tag.
|
|
7123
|
+
*
|
|
7124
|
+
* @param {Object} options - The options for retrieving the commit log.
|
|
7125
|
+
* @param {SimpleGit} options.git - The SimpleGit instance.
|
|
7126
|
+
* @param {Logger} options.logger - The logger for logging messages.
|
|
7127
|
+
* @param {string} options.targetTag - The tag to compare against.
|
|
7128
|
+
* @returns {Promise<CommitDetails[]>} The array of commit messages in the commit log.
|
|
7129
|
+
*/
|
|
7130
|
+
async function getCommitLogAgainstTag({ git, logger, targetTag, }) {
|
|
7131
|
+
try {
|
|
7132
|
+
const currentBranch = await getCurrentBranchName({ git });
|
|
7133
|
+
const uniqueCommits = (await git.raw(['rev-list', `${targetTag}..${currentBranch}`]))
|
|
7134
|
+
.split('\n')
|
|
7135
|
+
.filter(Boolean)
|
|
7136
|
+
.reverse();
|
|
7137
|
+
logger?.verbose(`Found ${uniqueCommits.length} unique commits between "${currentBranch}" and tag "${targetTag}"`, { color: 'blue' });
|
|
7138
|
+
const firstCommit = uniqueCommits[0];
|
|
7139
|
+
const lastCommit = uniqueCommits[uniqueCommits.length - 1];
|
|
7140
|
+
if (!firstCommit || !lastCommit) {
|
|
7141
|
+
logger?.log('Unable to determine first and last commit between branch and tag', { color: 'yellow' });
|
|
7142
|
+
return [];
|
|
7143
|
+
}
|
|
7144
|
+
return await getCommitLogRangeDetails(firstCommit, lastCommit, { git, noMerges: true });
|
|
7145
|
+
}
|
|
7146
|
+
catch (error) {
|
|
7147
|
+
logger?.log('Encountered an error getting commit log between branch and tag', { color: 'red' });
|
|
7148
|
+
}
|
|
7149
|
+
return [];
|
|
7150
|
+
}
|
|
7151
|
+
|
|
7077
7152
|
/**
|
|
7078
7153
|
* Retrieves the commit log for the current branch.
|
|
7079
7154
|
*
|
|
@@ -7692,6 +7767,15 @@ const handler$4 = async (argv, logger) => {
|
|
|
7692
7767
|
const git = getRepo();
|
|
7693
7768
|
const key = getApiKeyForModel(config);
|
|
7694
7769
|
const { provider, model } = getModelAndProviderFromConfig(config);
|
|
7770
|
+
const exclusiveOptions = [
|
|
7771
|
+
argv.branch ? '--branch' : null,
|
|
7772
|
+
argv.tag ? '--tag' : null,
|
|
7773
|
+
config.sinceLastTag ? '--since-last-tag' : null,
|
|
7774
|
+
].filter(Boolean);
|
|
7775
|
+
if (exclusiveOptions.length > 1) {
|
|
7776
|
+
logger.log(`Options ${exclusiveOptions.join(', ')} cannot be used together.`, { color: 'red' });
|
|
7777
|
+
process.exit(1);
|
|
7778
|
+
}
|
|
7695
7779
|
if (config.service.authentication.type !== 'None' && !key) {
|
|
7696
7780
|
logger.log(`No API Key found. 🗝️🚪`, { color: 'red' });
|
|
7697
7781
|
process.exit(1);
|
|
@@ -7733,6 +7817,10 @@ const handler$4 = async (argv, logger) => {
|
|
|
7733
7817
|
logger.verbose(`Generating commit log against branch: ${argv.branch}`, { color: 'yellow' });
|
|
7734
7818
|
commits = await getCommitLogAgainstBranch({ git, logger, targetBranch: argv.branch });
|
|
7735
7819
|
}
|
|
7820
|
+
else if (argv.tag) {
|
|
7821
|
+
logger.verbose(`Generating commit log against tag: ${argv.tag}`, { color: 'yellow' });
|
|
7822
|
+
commits = await getCommitLogAgainstTag({ git, logger, targetTag: argv.tag });
|
|
7823
|
+
}
|
|
7736
7824
|
else {
|
|
7737
7825
|
logger.verbose(`No range, branch, or tag option provided. Defaulting to current branch`, {
|
|
7738
7826
|
color: 'yellow',
|
|
@@ -8006,6 +8094,45 @@ function repairJson(jsonString) {
|
|
|
8006
8094
|
}
|
|
8007
8095
|
}
|
|
8008
8096
|
|
|
8097
|
+
/**
|
|
8098
|
+
* Extract the first complete JSON object from a string by tracking balanced braces
|
|
8099
|
+
*/
|
|
8100
|
+
function extractFirstJsonObject(text) {
|
|
8101
|
+
const startIndex = text.indexOf('{');
|
|
8102
|
+
if (startIndex === -1)
|
|
8103
|
+
return null;
|
|
8104
|
+
let braceCount = 0;
|
|
8105
|
+
let inString = false;
|
|
8106
|
+
let escapeNext = false;
|
|
8107
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
8108
|
+
const char = text[i];
|
|
8109
|
+
if (escapeNext) {
|
|
8110
|
+
escapeNext = false;
|
|
8111
|
+
continue;
|
|
8112
|
+
}
|
|
8113
|
+
if (char === '\\') {
|
|
8114
|
+
escapeNext = true;
|
|
8115
|
+
continue;
|
|
8116
|
+
}
|
|
8117
|
+
if (char === '"') {
|
|
8118
|
+
inString = !inString;
|
|
8119
|
+
continue;
|
|
8120
|
+
}
|
|
8121
|
+
if (inString)
|
|
8122
|
+
continue;
|
|
8123
|
+
if (char === '{') {
|
|
8124
|
+
braceCount++;
|
|
8125
|
+
}
|
|
8126
|
+
else if (char === '}') {
|
|
8127
|
+
braceCount--;
|
|
8128
|
+
if (braceCount === 0) {
|
|
8129
|
+
// Found the end of the first complete JSON object
|
|
8130
|
+
return text.substring(startIndex, i + 1);
|
|
8131
|
+
}
|
|
8132
|
+
}
|
|
8133
|
+
}
|
|
8134
|
+
return null;
|
|
8135
|
+
}
|
|
8009
8136
|
/**
|
|
8010
8137
|
* Utility function to ensure commit messages are properly formatted as strings
|
|
8011
8138
|
* rather than JSON objects, whether they come as parsed objects or stringified JSON
|
|
@@ -8024,23 +8151,26 @@ function formatCommitMessage(result, options = {}) {
|
|
|
8024
8151
|
if (!result.includes('{') && !result.includes('"title"')) {
|
|
8025
8152
|
return result;
|
|
8026
8153
|
}
|
|
8027
|
-
// Handle multiple markdown code block formats
|
|
8028
|
-
const
|
|
8154
|
+
// Handle multiple markdown code block formats and embedded JSON
|
|
8155
|
+
const extractionPatterns = [
|
|
8029
8156
|
/```(?:json)?\s*(\{[\s\S]*?\})\s*```/, // Standard markdown blocks
|
|
8030
8157
|
/`(\{[\s\S]*?\})`/, // Inline code blocks
|
|
8031
|
-
/^\s*(\{[\s\S]*\})\s
|
|
8158
|
+
/^\s*(\{[\s\S]*\})\s*$/, // Raw JSON without blocks (entire string)
|
|
8159
|
+
/(\{[\s\S]*?\})/ // JSON anywhere in text (fallback)
|
|
8032
8160
|
];
|
|
8033
8161
|
let jsonString = result;
|
|
8162
|
+
let foundMatch = false;
|
|
8034
8163
|
// Try each pattern to extract JSON
|
|
8035
|
-
for (const pattern of
|
|
8164
|
+
for (const pattern of extractionPatterns) {
|
|
8036
8165
|
const match = result.match(pattern);
|
|
8037
8166
|
if (match && match[1]) {
|
|
8038
8167
|
jsonString = match[1].trim();
|
|
8168
|
+
foundMatch = true;
|
|
8039
8169
|
break;
|
|
8040
8170
|
}
|
|
8041
8171
|
}
|
|
8042
8172
|
// Only attempt JSON parsing if we found potential JSON content
|
|
8043
|
-
if (
|
|
8173
|
+
if (foundMatch || jsonString.startsWith('{')) {
|
|
8044
8174
|
try {
|
|
8045
8175
|
// Try to parse as JSON to see if it's a stringified object
|
|
8046
8176
|
const parsed = JSON.parse(jsonString);
|
|
@@ -8070,7 +8200,24 @@ function formatCommitMessage(result, options = {}) {
|
|
|
8070
8200
|
}
|
|
8071
8201
|
}
|
|
8072
8202
|
catch {
|
|
8073
|
-
// Repair failed,
|
|
8203
|
+
// Repair failed, try extracting just the first complete JSON object
|
|
8204
|
+
const firstObject = extractFirstJsonObject(jsonString);
|
|
8205
|
+
if (firstObject) {
|
|
8206
|
+
try {
|
|
8207
|
+
const parsed = JSON.parse(firstObject);
|
|
8208
|
+
if (parsed &&
|
|
8209
|
+
typeof parsed === 'object' &&
|
|
8210
|
+
typeof parsed.title === 'string' &&
|
|
8211
|
+
typeof parsed.body === 'string' &&
|
|
8212
|
+
parsed.title.length > 0 &&
|
|
8213
|
+
parsed.body.length > 0) {
|
|
8214
|
+
return constructMessage(parsed.title, parsed.body);
|
|
8215
|
+
}
|
|
8216
|
+
}
|
|
8217
|
+
catch {
|
|
8218
|
+
// Even first object extraction failed, continue to fallback
|
|
8219
|
+
}
|
|
8220
|
+
}
|
|
8074
8221
|
}
|
|
8075
8222
|
}
|
|
8076
8223
|
}
|
|
@@ -8110,6 +8257,114 @@ async function summarize(documents, { chain, textSplitter, options }) {
|
|
|
8110
8257
|
return res.text && res.text.trim();
|
|
8111
8258
|
}
|
|
8112
8259
|
|
|
8260
|
+
/**
|
|
8261
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8262
|
+
*/
|
|
8263
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8264
|
+
try {
|
|
8265
|
+
const fileSummary = await summarize([
|
|
8266
|
+
{
|
|
8267
|
+
pageContent: fileDiff.diff,
|
|
8268
|
+
metadata: {
|
|
8269
|
+
file: fileDiff.file,
|
|
8270
|
+
summary: fileDiff.summary,
|
|
8271
|
+
},
|
|
8272
|
+
},
|
|
8273
|
+
], {
|
|
8274
|
+
chain,
|
|
8275
|
+
textSplitter,
|
|
8276
|
+
options: {
|
|
8277
|
+
returnIntermediateSteps: false,
|
|
8278
|
+
},
|
|
8279
|
+
});
|
|
8280
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8281
|
+
return {
|
|
8282
|
+
...fileDiff,
|
|
8283
|
+
diff: fileSummary,
|
|
8284
|
+
tokenCount: newTokenCount,
|
|
8285
|
+
};
|
|
8286
|
+
}
|
|
8287
|
+
catch (error) {
|
|
8288
|
+
// On error, return original diff unchanged
|
|
8289
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8290
|
+
return fileDiff;
|
|
8291
|
+
}
|
|
8292
|
+
}
|
|
8293
|
+
/**
|
|
8294
|
+
* Process files in waves to respect concurrency limits.
|
|
8295
|
+
*/
|
|
8296
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8297
|
+
const results = [];
|
|
8298
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8299
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8300
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8301
|
+
results.push(...waveResults);
|
|
8302
|
+
}
|
|
8303
|
+
return results;
|
|
8304
|
+
}
|
|
8305
|
+
/**
|
|
8306
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8307
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8308
|
+
* the final commit message toward a single file's changes.
|
|
8309
|
+
*
|
|
8310
|
+
* @param diffs - Array of file diffs to process
|
|
8311
|
+
* @param options - Configuration options for summarization
|
|
8312
|
+
* @returns Array of file diffs with large files summarized
|
|
8313
|
+
*/
|
|
8314
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8315
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8316
|
+
// Identify files that need summarization
|
|
8317
|
+
const filesToSummarize = [];
|
|
8318
|
+
const results = [...diffs];
|
|
8319
|
+
diffs.forEach((diff, index) => {
|
|
8320
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8321
|
+
filesToSummarize.push({ index, diff });
|
|
8322
|
+
}
|
|
8323
|
+
});
|
|
8324
|
+
if (filesToSummarize.length === 0) {
|
|
8325
|
+
return results;
|
|
8326
|
+
}
|
|
8327
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8328
|
+
// Process large files in waves
|
|
8329
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8330
|
+
// Update results with summarized files
|
|
8331
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8332
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8333
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8334
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8335
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8336
|
+
results[originalIndex] = summarizedDiff;
|
|
8337
|
+
});
|
|
8338
|
+
return results;
|
|
8339
|
+
}
|
|
8340
|
+
/**
|
|
8341
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8342
|
+
* Returns a new DiffNode with updated token counts.
|
|
8343
|
+
*/
|
|
8344
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8345
|
+
// Collect all diffs from the tree
|
|
8346
|
+
const allDiffs = [];
|
|
8347
|
+
function collectDiffs(node) {
|
|
8348
|
+
allDiffs.push(...node.diffs);
|
|
8349
|
+
node.children.forEach(collectDiffs);
|
|
8350
|
+
}
|
|
8351
|
+
collectDiffs(rootNode);
|
|
8352
|
+
// Summarize large files
|
|
8353
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8354
|
+
// Create a map for quick lookup
|
|
8355
|
+
const diffMap = new Map();
|
|
8356
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8357
|
+
// Rebuild tree with processed diffs
|
|
8358
|
+
function rebuildNode(node) {
|
|
8359
|
+
return {
|
|
8360
|
+
path: node.path,
|
|
8361
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8362
|
+
children: node.children.map(rebuildNode),
|
|
8363
|
+
};
|
|
8364
|
+
}
|
|
8365
|
+
return rebuildNode(rootNode);
|
|
8366
|
+
}
|
|
8367
|
+
|
|
8113
8368
|
/**
|
|
8114
8369
|
* Create groups from a given node info.
|
|
8115
8370
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8162,6 +8417,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8162
8417
|
return directory;
|
|
8163
8418
|
}
|
|
8164
8419
|
}
|
|
8420
|
+
/**
|
|
8421
|
+
* Default output formatter for directory diffs.
|
|
8422
|
+
*
|
|
8423
|
+
* TODO: Future improvements to consider:
|
|
8424
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8425
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8426
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8427
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8428
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8429
|
+
*/
|
|
8165
8430
|
const defaultOutputCallback = (group) => {
|
|
8166
8431
|
let output = `
|
|
8167
8432
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8173,41 +8438,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8173
8438
|
}
|
|
8174
8439
|
return output;
|
|
8175
8440
|
};
|
|
8176
|
-
|
|
8177
|
-
|
|
8441
|
+
/**
|
|
8442
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8443
|
+
* while maintaining predictable behavior.
|
|
8444
|
+
*/
|
|
8445
|
+
async function summarizeInWaves(directories, options) {
|
|
8446
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8447
|
+
let totalTokenCount = initialTotal;
|
|
8448
|
+
const results = [...directories];
|
|
8449
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8450
|
+
const sortedIndices = directories
|
|
8451
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8452
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8453
|
+
let cursor = 0;
|
|
8454
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8455
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8456
|
+
const wave = [];
|
|
8457
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8458
|
+
const { index, tokens } = sortedIndices[i];
|
|
8459
|
+
// Skip directories below the minimum threshold
|
|
8460
|
+
if (tokens < minTokensForSummary) {
|
|
8461
|
+
cursor = i + 1;
|
|
8462
|
+
continue;
|
|
8463
|
+
}
|
|
8464
|
+
// Skip directories that have already been summarized
|
|
8465
|
+
if (results[index].summary) {
|
|
8466
|
+
cursor = i + 1;
|
|
8467
|
+
continue;
|
|
8468
|
+
}
|
|
8469
|
+
wave.push(index);
|
|
8470
|
+
cursor = i + 1;
|
|
8471
|
+
}
|
|
8472
|
+
// No more eligible candidates
|
|
8473
|
+
if (wave.length === 0) {
|
|
8474
|
+
break;
|
|
8475
|
+
}
|
|
8476
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8477
|
+
// Process wave in parallel
|
|
8478
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8479
|
+
// Update results and recalculate total
|
|
8480
|
+
waveResults.forEach((result, i) => {
|
|
8481
|
+
const idx = wave[i];
|
|
8482
|
+
const originalTokens = results[idx].tokenCount;
|
|
8483
|
+
const newTokens = result.tokenCount;
|
|
8484
|
+
const reduction = originalTokens - newTokens;
|
|
8485
|
+
totalTokenCount -= reduction;
|
|
8486
|
+
results[idx] = result;
|
|
8487
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8488
|
+
color: 'magenta',
|
|
8489
|
+
});
|
|
8490
|
+
});
|
|
8491
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8492
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8493
|
+
});
|
|
8494
|
+
// Check if we're now under budget
|
|
8495
|
+
if (totalTokenCount <= maxTokens) {
|
|
8496
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8497
|
+
break;
|
|
8498
|
+
}
|
|
8499
|
+
}
|
|
8500
|
+
return { directories: results, totalTokenCount };
|
|
8501
|
+
}
|
|
8502
|
+
/**
|
|
8503
|
+
* Summarize diffs using a three-phase approach:
|
|
8504
|
+
*
|
|
8505
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8506
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8507
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8508
|
+
*
|
|
8509
|
+
* This approach ensures:
|
|
8510
|
+
* - Large files don't bias the summary
|
|
8511
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8512
|
+
* - Efficient parallel processing with predictable behavior
|
|
8513
|
+
* - Early exit when under token budget
|
|
8514
|
+
*/
|
|
8515
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8516
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8517
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8518
|
+
// PHASE 1: Pre-process large files
|
|
8519
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8520
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8521
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8522
|
+
minTokensForSummary,
|
|
8523
|
+
maxConcurrent,
|
|
8524
|
+
tokenizer,
|
|
8525
|
+
logger,
|
|
8526
|
+
chain,
|
|
8527
|
+
textSplitter,
|
|
8528
|
+
});
|
|
8529
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8530
|
+
// PHASE 2: Directory grouping & assessment
|
|
8178
8531
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8179
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8180
|
-
// Sort by token count descending
|
|
8532
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8533
|
+
// Sort by token count descending for consistent output ordering
|
|
8181
8534
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8182
|
-
|
|
8535
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8183
8536
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8184
|
-
logger.
|
|
8185
|
-
|
|
8186
|
-
return queue.add(async () => {
|
|
8187
|
-
// If the diff token count is already less than the average req, we can skip summarizing.
|
|
8188
|
-
const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
|
|
8189
|
-
if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
|
|
8190
|
-
return group;
|
|
8191
|
-
}
|
|
8192
|
-
group = await summarizeDirectoryDiff(group, {
|
|
8193
|
-
chain,
|
|
8194
|
-
textSplitter,
|
|
8195
|
-
tokenizer,
|
|
8196
|
-
});
|
|
8197
|
-
// We need to subtract the old token count and add the new one
|
|
8198
|
-
totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
|
|
8199
|
-
directoryDiffs[i] = group;
|
|
8200
|
-
logger
|
|
8201
|
-
.verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
|
|
8202
|
-
.verbose(`\nTotal token count: ${totalTokenCount}`, {
|
|
8203
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8204
|
-
});
|
|
8205
|
-
return group;
|
|
8206
|
-
}, { priority: group.tokenCount });
|
|
8537
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8538
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8207
8539
|
});
|
|
8208
|
-
|
|
8209
|
-
|
|
8210
|
-
|
|
8540
|
+
// Early exit if already under budget
|
|
8541
|
+
if (totalTokenCount <= maxTokens) {
|
|
8542
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8543
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8544
|
+
}
|
|
8545
|
+
// PHASE 3: Wave-based summarization
|
|
8546
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8547
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8548
|
+
totalTokenCount,
|
|
8549
|
+
maxTokens,
|
|
8550
|
+
minTokensForSummary,
|
|
8551
|
+
maxConcurrent,
|
|
8552
|
+
logger,
|
|
8553
|
+
chain,
|
|
8554
|
+
textSplitter,
|
|
8555
|
+
tokenizer,
|
|
8556
|
+
});
|
|
8557
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8558
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8211
8559
|
}
|
|
8212
8560
|
|
|
8213
8561
|
/**
|
|
@@ -11207,7 +11555,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11207
11555
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11208
11556
|
}
|
|
11209
11557
|
|
|
11210
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11558
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11211
11559
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11212
11560
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11213
11561
|
type: 'map_reduce',
|
|
@@ -11221,11 +11569,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11221
11569
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11222
11570
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11223
11571
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11224
|
-
// Summarize diffs
|
|
11572
|
+
// Summarize diffs using three-phase approach:
|
|
11573
|
+
// 1. Pre-process large files to prevent bias
|
|
11574
|
+
// 2. Group by directory and assess token count
|
|
11575
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11225
11576
|
logger.startTimer();
|
|
11226
11577
|
const summary = await summarizeDiffs(diffs, {
|
|
11227
11578
|
tokenizer,
|
|
11228
|
-
maxTokens: maxTokens ||
|
|
11579
|
+
maxTokens: maxTokens || 2048,
|
|
11580
|
+
minTokensForSummary,
|
|
11581
|
+
maxFileTokens,
|
|
11582
|
+
maxConcurrent,
|
|
11229
11583
|
textSplitter,
|
|
11230
11584
|
chain: summarizationChain,
|
|
11231
11585
|
logger,
|
|
@@ -11528,7 +11882,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11528
11882
|
return await fileChangeParser({
|
|
11529
11883
|
changes,
|
|
11530
11884
|
commit: '--staged',
|
|
11531
|
-
options: {
|
|
11885
|
+
options: {
|
|
11886
|
+
tokenizer,
|
|
11887
|
+
git,
|
|
11888
|
+
llm,
|
|
11889
|
+
logger,
|
|
11890
|
+
maxTokens: config.service.tokenLimit,
|
|
11891
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11892
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11893
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11894
|
+
},
|
|
11532
11895
|
});
|
|
11533
11896
|
}
|
|
11534
11897
|
const commitMsg = await generateAndReviewLoop({
|
|
@@ -11571,18 +11934,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11571
11934
|
REQUIRED JSON FORMAT:
|
|
11572
11935
|
${schema.description}
|
|
11573
11936
|
|
|
11574
|
-
EXAMPLE (follow this
|
|
11575
|
-
{
|
|
11576
|
-
"title": "feat(auth): add user authentication system",
|
|
11577
|
-
"body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."
|
|
11578
|
-
}
|
|
11937
|
+
EXAMPLE (follow this EXACT format - compact JSON on a single line or minimal whitespace):
|
|
11938
|
+
{"title": "feat(auth): add user authentication system", "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."}
|
|
11579
11939
|
|
|
11580
11940
|
IMPORTANT RULES:
|
|
11941
|
+
- Return ONLY the JSON object - NO markdown code blocks, NO backticks, NO extra text
|
|
11581
11942
|
- ALL string values MUST be enclosed in double quotes
|
|
11943
|
+
- Use compact JSON format (minimal whitespace) for best compatibility
|
|
11582
11944
|
- NO trailing commas
|
|
11583
11945
|
- NO comments or additional text outside the JSON
|
|
11584
|
-
- The "title" and "body" values must be properly quoted strings
|
|
11585
|
-
- Return ONLY the JSON object, nothing else`;
|
|
11946
|
+
- The "title" and "body" values must be properly quoted strings`;
|
|
11586
11947
|
// Use conventional commit prompt if enabled
|
|
11587
11948
|
const promptTemplate = USE_CONVENTIONAL_COMMITS ? CONVENTIONAL_COMMIT_PROMPT : COMMIT_PROMPT;
|
|
11588
11949
|
const prompt = getPrompt({
|
|
@@ -11676,10 +12037,33 @@ IMPORTANT RULES:
|
|
|
11676
12037
|
logger.verbose(`Failed to parse commit message (attempt ${attempt}/${maxAttempts}): ${error.message}`, { color: 'yellow' });
|
|
11677
12038
|
},
|
|
11678
12039
|
},
|
|
11679
|
-
fallbackParser: (text) =>
|
|
11680
|
-
|
|
11681
|
-
|
|
11682
|
-
|
|
12040
|
+
fallbackParser: (text) => {
|
|
12041
|
+
// First try to parse as JSON in case it's valid JSON with unusual formatting
|
|
12042
|
+
try {
|
|
12043
|
+
// Remove markdown code blocks if present
|
|
12044
|
+
let cleanText = text.trim();
|
|
12045
|
+
const codeBlockMatch = cleanText.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
|
|
12046
|
+
if (codeBlockMatch && codeBlockMatch[1]) {
|
|
12047
|
+
cleanText = codeBlockMatch[1].trim();
|
|
12048
|
+
}
|
|
12049
|
+
const parsed = JSON.parse(cleanText);
|
|
12050
|
+
if (parsed &&
|
|
12051
|
+
typeof parsed === 'object' &&
|
|
12052
|
+
typeof parsed.title === 'string' &&
|
|
12053
|
+
typeof parsed.body === 'string' &&
|
|
12054
|
+
parsed.title.length > 0) {
|
|
12055
|
+
return parsed;
|
|
12056
|
+
}
|
|
12057
|
+
}
|
|
12058
|
+
catch {
|
|
12059
|
+
// JSON parsing failed, fall through to text splitting
|
|
12060
|
+
}
|
|
12061
|
+
// Fallback to simple text splitting
|
|
12062
|
+
return {
|
|
12063
|
+
title: text.split('\n')[0] || 'Auto-generated commit',
|
|
12064
|
+
body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
|
|
12065
|
+
};
|
|
12066
|
+
},
|
|
11683
12067
|
onFallback: () => {
|
|
11684
12068
|
logger.verbose('Max retry attempts reached. Falling back to simple text output.', {
|
|
11685
12069
|
color: 'red',
|