git-coco 0.23.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +31 -0
- package/dist/index.esm.mjs +439 -55
- package/dist/index.js +439 -55
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -26,7 +26,6 @@ var outputs = require('@langchain/core/outputs');
|
|
|
26
26
|
var manager = require('@langchain/core/callbacks/manager');
|
|
27
27
|
require('@langchain/core/utils/json_patch');
|
|
28
28
|
var simpleGit = require('simple-git');
|
|
29
|
-
var pQueue = require('p-queue');
|
|
30
29
|
var documents = require('@langchain/core/documents');
|
|
31
30
|
var diff = require('diff');
|
|
32
31
|
require('@langchain/core/messages');
|
|
@@ -69,7 +68,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
|
|
|
69
68
|
/**
|
|
70
69
|
* Current build version from package.json
|
|
71
70
|
*/
|
|
72
|
-
const BUILD_VERSION = "0.
|
|
71
|
+
const BUILD_VERSION = "0.25.0";
|
|
73
72
|
|
|
74
73
|
const isInteractive = (config) => {
|
|
75
74
|
return config?.mode === 'interactive' || !!config?.interactive;
|
|
@@ -184,6 +183,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
|
|
|
184
183
|
fs.writeFileSync(filePath, newLines.join('\n'));
|
|
185
184
|
}
|
|
186
185
|
|
|
186
|
+
/**
|
|
187
|
+
* Prompt template for summarizing code diffs.
|
|
188
|
+
*
|
|
189
|
+
* TODO: Future improvements to consider:
|
|
190
|
+
* - Separate prompts for file-level vs directory-level summarization
|
|
191
|
+
* - Include file type context (e.g., "This is a React component", "This is a test file")
|
|
192
|
+
* - Add guidance for preserving semantic meaning of changes
|
|
193
|
+
* - Consider change type (added/modified/deleted) in prompt for better context
|
|
194
|
+
* - Include hints about the programming language for more idiomatic summaries
|
|
195
|
+
* - Add support for custom user-provided summarization prompts via config
|
|
196
|
+
*/
|
|
187
197
|
const template$5 = `GOAL: Use functional abstractions to summarize the following text
|
|
188
198
|
|
|
189
199
|
RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
|
|
@@ -1061,6 +1071,16 @@ const schema$1 = {
|
|
|
1061
1071
|
"description": "The maximum number of requests to make concurrently.",
|
|
1062
1072
|
"default": 6
|
|
1063
1073
|
},
|
|
1074
|
+
"minTokensForSummary": {
|
|
1075
|
+
"type": "number",
|
|
1076
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1077
|
+
"default": 400
|
|
1078
|
+
},
|
|
1079
|
+
"maxFileTokens": {
|
|
1080
|
+
"type": "number",
|
|
1081
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1082
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1083
|
+
},
|
|
1064
1084
|
"authentication": {
|
|
1065
1085
|
"anyOf": [
|
|
1066
1086
|
{
|
|
@@ -1821,6 +1841,16 @@ const schema$1 = {
|
|
|
1821
1841
|
"description": "The maximum number of requests to make concurrently.",
|
|
1822
1842
|
"default": 6
|
|
1823
1843
|
},
|
|
1844
|
+
"minTokensForSummary": {
|
|
1845
|
+
"type": "number",
|
|
1846
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
1847
|
+
"default": 400
|
|
1848
|
+
},
|
|
1849
|
+
"maxFileTokens": {
|
|
1850
|
+
"type": "number",
|
|
1851
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
1852
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
1853
|
+
},
|
|
1824
1854
|
"authentication": {
|
|
1825
1855
|
"anyOf": [
|
|
1826
1856
|
{
|
|
@@ -1972,6 +2002,16 @@ const schema$1 = {
|
|
|
1972
2002
|
"description": "The maximum number of requests to make concurrently.",
|
|
1973
2003
|
"default": 6
|
|
1974
2004
|
},
|
|
2005
|
+
"minTokensForSummary": {
|
|
2006
|
+
"type": "number",
|
|
2007
|
+
"description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
|
|
2008
|
+
"default": 400
|
|
2009
|
+
},
|
|
2010
|
+
"maxFileTokens": {
|
|
2011
|
+
"type": "number",
|
|
2012
|
+
"description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
|
|
2013
|
+
"default": "undefined (uses 0.25 * tokenLimit)"
|
|
2014
|
+
},
|
|
1975
2015
|
"authentication": {
|
|
1976
2016
|
"anyOf": [
|
|
1977
2017
|
{
|
|
@@ -6080,9 +6120,13 @@ const options$4 = {
|
|
|
6080
6120
|
alias: 'b',
|
|
6081
6121
|
description: 'Target branch to compare against',
|
|
6082
6122
|
},
|
|
6123
|
+
tag: {
|
|
6124
|
+
type: 'string',
|
|
6125
|
+
alias: 't',
|
|
6126
|
+
description: 'Target tag to compare against',
|
|
6127
|
+
},
|
|
6083
6128
|
sinceLastTag: {
|
|
6084
6129
|
type: 'boolean',
|
|
6085
|
-
alias: 't',
|
|
6086
6130
|
description: 'Generate changelog for all commits since the last tag',
|
|
6087
6131
|
default: false,
|
|
6088
6132
|
},
|
|
@@ -7096,6 +7140,37 @@ async function getCommitLogAgainstBranch({ git, logger, targetBranch, }) {
|
|
|
7096
7140
|
return [];
|
|
7097
7141
|
}
|
|
7098
7142
|
|
|
7143
|
+
/**
|
|
7144
|
+
* Retrieves the commit log between the current branch and a specified tag.
|
|
7145
|
+
*
|
|
7146
|
+
* @param {Object} options - The options for retrieving the commit log.
|
|
7147
|
+
* @param {SimpleGit} options.git - The SimpleGit instance.
|
|
7148
|
+
* @param {Logger} options.logger - The logger for logging messages.
|
|
7149
|
+
* @param {string} options.targetTag - The tag to compare against.
|
|
7150
|
+
* @returns {Promise<CommitDetails[]>} The array of commit messages in the commit log.
|
|
7151
|
+
*/
|
|
7152
|
+
async function getCommitLogAgainstTag({ git, logger, targetTag, }) {
|
|
7153
|
+
try {
|
|
7154
|
+
const currentBranch = await getCurrentBranchName({ git });
|
|
7155
|
+
const uniqueCommits = (await git.raw(['rev-list', `${targetTag}..${currentBranch}`]))
|
|
7156
|
+
.split('\n')
|
|
7157
|
+
.filter(Boolean)
|
|
7158
|
+
.reverse();
|
|
7159
|
+
logger?.verbose(`Found ${uniqueCommits.length} unique commits between "${currentBranch}" and tag "${targetTag}"`, { color: 'blue' });
|
|
7160
|
+
const firstCommit = uniqueCommits[0];
|
|
7161
|
+
const lastCommit = uniqueCommits[uniqueCommits.length - 1];
|
|
7162
|
+
if (!firstCommit || !lastCommit) {
|
|
7163
|
+
logger?.log('Unable to determine first and last commit between branch and tag', { color: 'yellow' });
|
|
7164
|
+
return [];
|
|
7165
|
+
}
|
|
7166
|
+
return await getCommitLogRangeDetails(firstCommit, lastCommit, { git, noMerges: true });
|
|
7167
|
+
}
|
|
7168
|
+
catch (error) {
|
|
7169
|
+
logger?.log('Encountered an error getting commit log between branch and tag', { color: 'red' });
|
|
7170
|
+
}
|
|
7171
|
+
return [];
|
|
7172
|
+
}
|
|
7173
|
+
|
|
7099
7174
|
/**
|
|
7100
7175
|
* Retrieves the commit log for the current branch.
|
|
7101
7176
|
*
|
|
@@ -7714,6 +7789,15 @@ const handler$4 = async (argv, logger) => {
|
|
|
7714
7789
|
const git = getRepo();
|
|
7715
7790
|
const key = getApiKeyForModel(config);
|
|
7716
7791
|
const { provider, model } = getModelAndProviderFromConfig(config);
|
|
7792
|
+
const exclusiveOptions = [
|
|
7793
|
+
argv.branch ? '--branch' : null,
|
|
7794
|
+
argv.tag ? '--tag' : null,
|
|
7795
|
+
config.sinceLastTag ? '--since-last-tag' : null,
|
|
7796
|
+
].filter(Boolean);
|
|
7797
|
+
if (exclusiveOptions.length > 1) {
|
|
7798
|
+
logger.log(`Options ${exclusiveOptions.join(', ')} cannot be used together.`, { color: 'red' });
|
|
7799
|
+
process.exit(1);
|
|
7800
|
+
}
|
|
7717
7801
|
if (config.service.authentication.type !== 'None' && !key) {
|
|
7718
7802
|
logger.log(`No API Key found. 🗝️🚪`, { color: 'red' });
|
|
7719
7803
|
process.exit(1);
|
|
@@ -7755,6 +7839,10 @@ const handler$4 = async (argv, logger) => {
|
|
|
7755
7839
|
logger.verbose(`Generating commit log against branch: ${argv.branch}`, { color: 'yellow' });
|
|
7756
7840
|
commits = await getCommitLogAgainstBranch({ git, logger, targetBranch: argv.branch });
|
|
7757
7841
|
}
|
|
7842
|
+
else if (argv.tag) {
|
|
7843
|
+
logger.verbose(`Generating commit log against tag: ${argv.tag}`, { color: 'yellow' });
|
|
7844
|
+
commits = await getCommitLogAgainstTag({ git, logger, targetTag: argv.tag });
|
|
7845
|
+
}
|
|
7758
7846
|
else {
|
|
7759
7847
|
logger.verbose(`No range, branch, or tag option provided. Defaulting to current branch`, {
|
|
7760
7848
|
color: 'yellow',
|
|
@@ -8028,6 +8116,45 @@ function repairJson(jsonString) {
|
|
|
8028
8116
|
}
|
|
8029
8117
|
}
|
|
8030
8118
|
|
|
8119
|
+
/**
|
|
8120
|
+
* Extract the first complete JSON object from a string by tracking balanced braces
|
|
8121
|
+
*/
|
|
8122
|
+
function extractFirstJsonObject(text) {
|
|
8123
|
+
const startIndex = text.indexOf('{');
|
|
8124
|
+
if (startIndex === -1)
|
|
8125
|
+
return null;
|
|
8126
|
+
let braceCount = 0;
|
|
8127
|
+
let inString = false;
|
|
8128
|
+
let escapeNext = false;
|
|
8129
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
8130
|
+
const char = text[i];
|
|
8131
|
+
if (escapeNext) {
|
|
8132
|
+
escapeNext = false;
|
|
8133
|
+
continue;
|
|
8134
|
+
}
|
|
8135
|
+
if (char === '\\') {
|
|
8136
|
+
escapeNext = true;
|
|
8137
|
+
continue;
|
|
8138
|
+
}
|
|
8139
|
+
if (char === '"') {
|
|
8140
|
+
inString = !inString;
|
|
8141
|
+
continue;
|
|
8142
|
+
}
|
|
8143
|
+
if (inString)
|
|
8144
|
+
continue;
|
|
8145
|
+
if (char === '{') {
|
|
8146
|
+
braceCount++;
|
|
8147
|
+
}
|
|
8148
|
+
else if (char === '}') {
|
|
8149
|
+
braceCount--;
|
|
8150
|
+
if (braceCount === 0) {
|
|
8151
|
+
// Found the end of the first complete JSON object
|
|
8152
|
+
return text.substring(startIndex, i + 1);
|
|
8153
|
+
}
|
|
8154
|
+
}
|
|
8155
|
+
}
|
|
8156
|
+
return null;
|
|
8157
|
+
}
|
|
8031
8158
|
/**
|
|
8032
8159
|
* Utility function to ensure commit messages are properly formatted as strings
|
|
8033
8160
|
* rather than JSON objects, whether they come as parsed objects or stringified JSON
|
|
@@ -8046,23 +8173,26 @@ function formatCommitMessage(result, options = {}) {
|
|
|
8046
8173
|
if (!result.includes('{') && !result.includes('"title"')) {
|
|
8047
8174
|
return result;
|
|
8048
8175
|
}
|
|
8049
|
-
// Handle multiple markdown code block formats
|
|
8050
|
-
const
|
|
8176
|
+
// Handle multiple markdown code block formats and embedded JSON
|
|
8177
|
+
const extractionPatterns = [
|
|
8051
8178
|
/```(?:json)?\s*(\{[\s\S]*?\})\s*```/, // Standard markdown blocks
|
|
8052
8179
|
/`(\{[\s\S]*?\})`/, // Inline code blocks
|
|
8053
|
-
/^\s*(\{[\s\S]*\})\s
|
|
8180
|
+
/^\s*(\{[\s\S]*\})\s*$/, // Raw JSON without blocks (entire string)
|
|
8181
|
+
/(\{[\s\S]*?\})/ // JSON anywhere in text (fallback)
|
|
8054
8182
|
];
|
|
8055
8183
|
let jsonString = result;
|
|
8184
|
+
let foundMatch = false;
|
|
8056
8185
|
// Try each pattern to extract JSON
|
|
8057
|
-
for (const pattern of
|
|
8186
|
+
for (const pattern of extractionPatterns) {
|
|
8058
8187
|
const match = result.match(pattern);
|
|
8059
8188
|
if (match && match[1]) {
|
|
8060
8189
|
jsonString = match[1].trim();
|
|
8190
|
+
foundMatch = true;
|
|
8061
8191
|
break;
|
|
8062
8192
|
}
|
|
8063
8193
|
}
|
|
8064
8194
|
// Only attempt JSON parsing if we found potential JSON content
|
|
8065
|
-
if (
|
|
8195
|
+
if (foundMatch || jsonString.startsWith('{')) {
|
|
8066
8196
|
try {
|
|
8067
8197
|
// Try to parse as JSON to see if it's a stringified object
|
|
8068
8198
|
const parsed = JSON.parse(jsonString);
|
|
@@ -8092,7 +8222,24 @@ function formatCommitMessage(result, options = {}) {
|
|
|
8092
8222
|
}
|
|
8093
8223
|
}
|
|
8094
8224
|
catch {
|
|
8095
|
-
// Repair failed,
|
|
8225
|
+
// Repair failed, try extracting just the first complete JSON object
|
|
8226
|
+
const firstObject = extractFirstJsonObject(jsonString);
|
|
8227
|
+
if (firstObject) {
|
|
8228
|
+
try {
|
|
8229
|
+
const parsed = JSON.parse(firstObject);
|
|
8230
|
+
if (parsed &&
|
|
8231
|
+
typeof parsed === 'object' &&
|
|
8232
|
+
typeof parsed.title === 'string' &&
|
|
8233
|
+
typeof parsed.body === 'string' &&
|
|
8234
|
+
parsed.title.length > 0 &&
|
|
8235
|
+
parsed.body.length > 0) {
|
|
8236
|
+
return constructMessage(parsed.title, parsed.body);
|
|
8237
|
+
}
|
|
8238
|
+
}
|
|
8239
|
+
catch {
|
|
8240
|
+
// Even first object extraction failed, continue to fallback
|
|
8241
|
+
}
|
|
8242
|
+
}
|
|
8096
8243
|
}
|
|
8097
8244
|
}
|
|
8098
8245
|
}
|
|
@@ -8132,6 +8279,114 @@ async function summarize(documents$1, { chain, textSplitter, options }) {
|
|
|
8132
8279
|
return res.text && res.text.trim();
|
|
8133
8280
|
}
|
|
8134
8281
|
|
|
8282
|
+
/**
|
|
8283
|
+
* Summarize a single file diff that exceeds the token threshold.
|
|
8284
|
+
*/
|
|
8285
|
+
async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
|
|
8286
|
+
try {
|
|
8287
|
+
const fileSummary = await summarize([
|
|
8288
|
+
{
|
|
8289
|
+
pageContent: fileDiff.diff,
|
|
8290
|
+
metadata: {
|
|
8291
|
+
file: fileDiff.file,
|
|
8292
|
+
summary: fileDiff.summary,
|
|
8293
|
+
},
|
|
8294
|
+
},
|
|
8295
|
+
], {
|
|
8296
|
+
chain,
|
|
8297
|
+
textSplitter,
|
|
8298
|
+
options: {
|
|
8299
|
+
returnIntermediateSteps: false,
|
|
8300
|
+
},
|
|
8301
|
+
});
|
|
8302
|
+
const newTokenCount = tokenizer(fileSummary);
|
|
8303
|
+
return {
|
|
8304
|
+
...fileDiff,
|
|
8305
|
+
diff: fileSummary,
|
|
8306
|
+
tokenCount: newTokenCount,
|
|
8307
|
+
};
|
|
8308
|
+
}
|
|
8309
|
+
catch (error) {
|
|
8310
|
+
// On error, return original diff unchanged
|
|
8311
|
+
console.error(`Failed to summarize file ${fileDiff.file}:`, error);
|
|
8312
|
+
return fileDiff;
|
|
8313
|
+
}
|
|
8314
|
+
}
|
|
8315
|
+
/**
|
|
8316
|
+
* Process files in waves to respect concurrency limits.
|
|
8317
|
+
*/
|
|
8318
|
+
async function processInWaves(items, processor, maxConcurrent) {
|
|
8319
|
+
const results = [];
|
|
8320
|
+
for (let i = 0; i < items.length; i += maxConcurrent) {
|
|
8321
|
+
const wave = items.slice(i, i + maxConcurrent);
|
|
8322
|
+
const waveResults = await Promise.all(wave.map(processor));
|
|
8323
|
+
results.push(...waveResults);
|
|
8324
|
+
}
|
|
8325
|
+
return results;
|
|
8326
|
+
}
|
|
8327
|
+
/**
|
|
8328
|
+
* Pre-summarize individual files that exceed the maxFileTokens threshold.
|
|
8329
|
+
* This prevents large files from dominating the token budget and biasing
|
|
8330
|
+
* the final commit message toward a single file's changes.
|
|
8331
|
+
*
|
|
8332
|
+
* @param diffs - Array of file diffs to process
|
|
8333
|
+
* @param options - Configuration options for summarization
|
|
8334
|
+
* @returns Array of file diffs with large files summarized
|
|
8335
|
+
*/
|
|
8336
|
+
async function summarizeLargeFiles(diffs, options) {
|
|
8337
|
+
const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
|
|
8338
|
+
// Identify files that need summarization
|
|
8339
|
+
const filesToSummarize = [];
|
|
8340
|
+
const results = [...diffs];
|
|
8341
|
+
diffs.forEach((diff, index) => {
|
|
8342
|
+
if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
|
|
8343
|
+
filesToSummarize.push({ index, diff });
|
|
8344
|
+
}
|
|
8345
|
+
});
|
|
8346
|
+
if (filesToSummarize.length === 0) {
|
|
8347
|
+
return results;
|
|
8348
|
+
}
|
|
8349
|
+
logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
|
|
8350
|
+
// Process large files in waves
|
|
8351
|
+
const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
|
|
8352
|
+
// Update results with summarized files
|
|
8353
|
+
summarizedFiles.forEach((summarizedDiff, i) => {
|
|
8354
|
+
const originalIndex = filesToSummarize[i].index;
|
|
8355
|
+
const originalTokens = results[originalIndex].tokenCount;
|
|
8356
|
+
const newTokens = summarizedDiff.tokenCount;
|
|
8357
|
+
logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
|
|
8358
|
+
results[originalIndex] = summarizedDiff;
|
|
8359
|
+
});
|
|
8360
|
+
return results;
|
|
8361
|
+
}
|
|
8362
|
+
/**
|
|
8363
|
+
* Pre-process a DiffNode tree, summarizing large files at the leaf level.
|
|
8364
|
+
* Returns a new DiffNode with updated token counts.
|
|
8365
|
+
*/
|
|
8366
|
+
async function preprocessLargeFiles(rootNode, options) {
|
|
8367
|
+
// Collect all diffs from the tree
|
|
8368
|
+
const allDiffs = [];
|
|
8369
|
+
function collectDiffs(node) {
|
|
8370
|
+
allDiffs.push(...node.diffs);
|
|
8371
|
+
node.children.forEach(collectDiffs);
|
|
8372
|
+
}
|
|
8373
|
+
collectDiffs(rootNode);
|
|
8374
|
+
// Summarize large files
|
|
8375
|
+
const processedDiffs = await summarizeLargeFiles(allDiffs, options);
|
|
8376
|
+
// Create a map for quick lookup
|
|
8377
|
+
const diffMap = new Map();
|
|
8378
|
+
processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
|
|
8379
|
+
// Rebuild tree with processed diffs
|
|
8380
|
+
function rebuildNode(node) {
|
|
8381
|
+
return {
|
|
8382
|
+
path: node.path,
|
|
8383
|
+
diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
|
|
8384
|
+
children: node.children.map(rebuildNode),
|
|
8385
|
+
};
|
|
8386
|
+
}
|
|
8387
|
+
return rebuildNode(rootNode);
|
|
8388
|
+
}
|
|
8389
|
+
|
|
8135
8390
|
/**
|
|
8136
8391
|
* Create groups from a given node info.
|
|
8137
8392
|
* @param {DiffNode} node - The node info to start grouping.
|
|
@@ -8184,6 +8439,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
|
|
|
8184
8439
|
return directory;
|
|
8185
8440
|
}
|
|
8186
8441
|
}
|
|
8442
|
+
/**
|
|
8443
|
+
* Default output formatter for directory diffs.
|
|
8444
|
+
*
|
|
8445
|
+
* TODO: Future improvements to consider:
|
|
8446
|
+
* - Hierarchical output showing file -> directory -> overall summary
|
|
8447
|
+
* - Configurable verbosity levels (compact, standard, detailed)
|
|
8448
|
+
* - Machine-readable format option (JSON) for programmatic use
|
|
8449
|
+
* - Semantic grouping by change type (added/modified/deleted) or feature area
|
|
8450
|
+
* - Visual diff indicators showing magnitude of changes
|
|
8451
|
+
*/
|
|
8187
8452
|
const defaultOutputCallback = (group) => {
|
|
8188
8453
|
let output = `
|
|
8189
8454
|
-------\n* changes in "/${group.path}"\n\n`;
|
|
@@ -8195,41 +8460,124 @@ const defaultOutputCallback = (group) => {
|
|
|
8195
8460
|
}
|
|
8196
8461
|
return output;
|
|
8197
8462
|
};
|
|
8198
|
-
|
|
8199
|
-
|
|
8463
|
+
/**
|
|
8464
|
+
* Process directory summarization in waves to respect concurrency limits
|
|
8465
|
+
* while maintaining predictable behavior.
|
|
8466
|
+
*/
|
|
8467
|
+
async function summarizeInWaves(directories, options) {
|
|
8468
|
+
const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
|
|
8469
|
+
let totalTokenCount = initialTotal;
|
|
8470
|
+
const results = [...directories];
|
|
8471
|
+
// Create sorted indices by token count (descending) for prioritized processing
|
|
8472
|
+
const sortedIndices = directories
|
|
8473
|
+
.map((d, i) => ({ index: i, tokens: d.tokenCount }))
|
|
8474
|
+
.sort((a, b) => b.tokens - a.tokens);
|
|
8475
|
+
let cursor = 0;
|
|
8476
|
+
while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
|
|
8477
|
+
// Select wave candidates: directories that exceed minTokensForSummary
|
|
8478
|
+
const wave = [];
|
|
8479
|
+
for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
|
|
8480
|
+
const { index, tokens } = sortedIndices[i];
|
|
8481
|
+
// Skip directories below the minimum threshold
|
|
8482
|
+
if (tokens < minTokensForSummary) {
|
|
8483
|
+
cursor = i + 1;
|
|
8484
|
+
continue;
|
|
8485
|
+
}
|
|
8486
|
+
// Skip directories that have already been summarized
|
|
8487
|
+
if (results[index].summary) {
|
|
8488
|
+
cursor = i + 1;
|
|
8489
|
+
continue;
|
|
8490
|
+
}
|
|
8491
|
+
wave.push(index);
|
|
8492
|
+
cursor = i + 1;
|
|
8493
|
+
}
|
|
8494
|
+
// No more eligible candidates
|
|
8495
|
+
if (wave.length === 0) {
|
|
8496
|
+
break;
|
|
8497
|
+
}
|
|
8498
|
+
logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
|
|
8499
|
+
// Process wave in parallel
|
|
8500
|
+
const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
|
|
8501
|
+
// Update results and recalculate total
|
|
8502
|
+
waveResults.forEach((result, i) => {
|
|
8503
|
+
const idx = wave[i];
|
|
8504
|
+
const originalTokens = results[idx].tokenCount;
|
|
8505
|
+
const newTokens = result.tokenCount;
|
|
8506
|
+
const reduction = originalTokens - newTokens;
|
|
8507
|
+
totalTokenCount -= reduction;
|
|
8508
|
+
results[idx] = result;
|
|
8509
|
+
logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
|
|
8510
|
+
color: 'magenta',
|
|
8511
|
+
});
|
|
8512
|
+
});
|
|
8513
|
+
logger.verbose(`Total token count: ${totalTokenCount}`, {
|
|
8514
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8515
|
+
});
|
|
8516
|
+
// Check if we're now under budget
|
|
8517
|
+
if (totalTokenCount <= maxTokens) {
|
|
8518
|
+
logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
|
|
8519
|
+
break;
|
|
8520
|
+
}
|
|
8521
|
+
}
|
|
8522
|
+
return { directories: results, totalTokenCount };
|
|
8523
|
+
}
|
|
8524
|
+
/**
|
|
8525
|
+
* Summarize diffs using a three-phase approach:
|
|
8526
|
+
*
|
|
8527
|
+
* Phase 1: Pre-process large files to prevent any single file from dominating
|
|
8528
|
+
* Phase 2: Group diffs by directory and assess total token count
|
|
8529
|
+
* Phase 3: Wave-based parallel summarization until under budget
|
|
8530
|
+
*
|
|
8531
|
+
* This approach ensures:
|
|
8532
|
+
* - Large files don't bias the summary
|
|
8533
|
+
* - Small changes preserve their detail (minTokensForSummary threshold)
|
|
8534
|
+
* - Efficient parallel processing with predictable behavior
|
|
8535
|
+
* - Early exit when under token budget
|
|
8536
|
+
*/
|
|
8537
|
+
async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
|
|
8538
|
+
// Calculate maxFileTokens as 25% of maxTokens if not specified
|
|
8539
|
+
const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
|
|
8540
|
+
// PHASE 1: Pre-process large files
|
|
8541
|
+
logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
|
|
8542
|
+
const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
|
|
8543
|
+
maxFileTokens: effectiveMaxFileTokens,
|
|
8544
|
+
minTokensForSummary,
|
|
8545
|
+
maxConcurrent,
|
|
8546
|
+
tokenizer,
|
|
8547
|
+
logger,
|
|
8548
|
+
chain,
|
|
8549
|
+
textSplitter,
|
|
8550
|
+
});
|
|
8551
|
+
logger.stopSpinner('Files pre-processed').stopTimer();
|
|
8552
|
+
// PHASE 2: Directory grouping & assessment
|
|
8200
8553
|
logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
|
|
8201
|
-
const directoryDiffs = createDirectoryDiffs(
|
|
8202
|
-
// Sort by token count descending
|
|
8554
|
+
const directoryDiffs = createDirectoryDiffs(preprocessedNode);
|
|
8555
|
+
// Sort by token count descending for consistent output ordering
|
|
8203
8556
|
directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
8204
|
-
|
|
8557
|
+
const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
|
|
8205
8558
|
logger.stopSpinner('Diffs Organized').stopTimer();
|
|
8206
|
-
logger.
|
|
8207
|
-
|
|
8208
|
-
return queue.add(async () => {
|
|
8209
|
-
// If the diff token count is already less than the average req, we can skip summarizing.
|
|
8210
|
-
const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
|
|
8211
|
-
if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
|
|
8212
|
-
return group;
|
|
8213
|
-
}
|
|
8214
|
-
group = await summarizeDirectoryDiff(group, {
|
|
8215
|
-
chain,
|
|
8216
|
-
textSplitter,
|
|
8217
|
-
tokenizer,
|
|
8218
|
-
});
|
|
8219
|
-
// We need to subtract the old token count and add the new one
|
|
8220
|
-
totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
|
|
8221
|
-
directoryDiffs[i] = group;
|
|
8222
|
-
logger
|
|
8223
|
-
.verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
|
|
8224
|
-
.verbose(`\nTotal token count: ${totalTokenCount}`, {
|
|
8225
|
-
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8226
|
-
});
|
|
8227
|
-
return group;
|
|
8228
|
-
}, { priority: group.tokenCount });
|
|
8559
|
+
logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
|
|
8560
|
+
color: totalTokenCount > maxTokens ? 'yellow' : 'green',
|
|
8229
8561
|
});
|
|
8230
|
-
|
|
8231
|
-
|
|
8232
|
-
|
|
8562
|
+
// Early exit if already under budget
|
|
8563
|
+
if (totalTokenCount <= maxTokens) {
|
|
8564
|
+
logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
|
|
8565
|
+
return directoryDiffs.map(handleOutput).join('');
|
|
8566
|
+
}
|
|
8567
|
+
// PHASE 3: Wave-based summarization
|
|
8568
|
+
logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
|
|
8569
|
+
const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
|
|
8570
|
+
totalTokenCount,
|
|
8571
|
+
maxTokens,
|
|
8572
|
+
minTokensForSummary,
|
|
8573
|
+
maxConcurrent,
|
|
8574
|
+
logger,
|
|
8575
|
+
chain,
|
|
8576
|
+
textSplitter,
|
|
8577
|
+
tokenizer,
|
|
8578
|
+
});
|
|
8579
|
+
logger.stopSpinner(`Diffs Consolidated`).stopTimer();
|
|
8580
|
+
return summarizedDiffs.map(handleOutput).join('');
|
|
8233
8581
|
}
|
|
8234
8582
|
|
|
8235
8583
|
/**
|
|
@@ -11229,7 +11577,7 @@ for (var i = 0; i < 256; i++) {
|
|
|
11229
11577
|
simpleEscapeMap[i] = simpleEscapeSequence(i);
|
|
11230
11578
|
}
|
|
11231
11579
|
|
|
11232
|
-
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
|
|
11580
|
+
async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
|
|
11233
11581
|
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
|
|
11234
11582
|
const summarizationChain = loadSummarizationChain(model, {
|
|
11235
11583
|
type: 'map_reduce',
|
|
@@ -11243,11 +11591,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
|
|
|
11243
11591
|
logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
|
|
11244
11592
|
const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
|
|
11245
11593
|
logger.stopSpinner('Diffs Collected').stopTimer();
|
|
11246
|
-
// Summarize diffs
|
|
11594
|
+
// Summarize diffs using three-phase approach:
|
|
11595
|
+
// 1. Pre-process large files to prevent bias
|
|
11596
|
+
// 2. Group by directory and assess token count
|
|
11597
|
+
// 3. Wave-based parallel summarization until under budget
|
|
11247
11598
|
logger.startTimer();
|
|
11248
11599
|
const summary = await summarizeDiffs(diffs, {
|
|
11249
11600
|
tokenizer,
|
|
11250
|
-
maxTokens: maxTokens ||
|
|
11601
|
+
maxTokens: maxTokens || 2048,
|
|
11602
|
+
minTokensForSummary,
|
|
11603
|
+
maxFileTokens,
|
|
11604
|
+
maxConcurrent,
|
|
11251
11605
|
textSplitter,
|
|
11252
11606
|
chain: summarizationChain,
|
|
11253
11607
|
logger,
|
|
@@ -11550,7 +11904,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11550
11904
|
return await fileChangeParser({
|
|
11551
11905
|
changes,
|
|
11552
11906
|
commit: '--staged',
|
|
11553
|
-
options: {
|
|
11907
|
+
options: {
|
|
11908
|
+
tokenizer,
|
|
11909
|
+
git,
|
|
11910
|
+
llm,
|
|
11911
|
+
logger,
|
|
11912
|
+
maxTokens: config.service.tokenLimit,
|
|
11913
|
+
minTokensForSummary: config.service.minTokensForSummary,
|
|
11914
|
+
maxFileTokens: config.service.maxFileTokens,
|
|
11915
|
+
maxConcurrent: config.service.maxConcurrent,
|
|
11916
|
+
},
|
|
11554
11917
|
});
|
|
11555
11918
|
}
|
|
11556
11919
|
const commitMsg = await generateAndReviewLoop({
|
|
@@ -11593,18 +11956,16 @@ const handler$3 = async (argv, logger) => {
|
|
|
11593
11956
|
REQUIRED JSON FORMAT:
|
|
11594
11957
|
${schema.description}
|
|
11595
11958
|
|
|
11596
|
-
EXAMPLE (follow this
|
|
11597
|
-
{
|
|
11598
|
-
"title": "feat(auth): add user authentication system",
|
|
11599
|
-
"body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."
|
|
11600
|
-
}
|
|
11959
|
+
EXAMPLE (follow this EXACT format - compact JSON on a single line or minimal whitespace):
|
|
11960
|
+
{"title": "feat(auth): add user authentication system", "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."}
|
|
11601
11961
|
|
|
11602
11962
|
IMPORTANT RULES:
|
|
11963
|
+
- Return ONLY the JSON object - NO markdown code blocks, NO backticks, NO extra text
|
|
11603
11964
|
- ALL string values MUST be enclosed in double quotes
|
|
11965
|
+
- Use compact JSON format (minimal whitespace) for best compatibility
|
|
11604
11966
|
- NO trailing commas
|
|
11605
11967
|
- NO comments or additional text outside the JSON
|
|
11606
|
-
- The "title" and "body" values must be properly quoted strings
|
|
11607
|
-
- Return ONLY the JSON object, nothing else`;
|
|
11968
|
+
- The "title" and "body" values must be properly quoted strings`;
|
|
11608
11969
|
// Use conventional commit prompt if enabled
|
|
11609
11970
|
const promptTemplate = USE_CONVENTIONAL_COMMITS ? CONVENTIONAL_COMMIT_PROMPT : COMMIT_PROMPT;
|
|
11610
11971
|
const prompt = getPrompt({
|
|
@@ -11698,10 +12059,33 @@ IMPORTANT RULES:
|
|
|
11698
12059
|
logger.verbose(`Failed to parse commit message (attempt ${attempt}/${maxAttempts}): ${error.message}`, { color: 'yellow' });
|
|
11699
12060
|
},
|
|
11700
12061
|
},
|
|
11701
|
-
fallbackParser: (text) =>
|
|
11702
|
-
|
|
11703
|
-
|
|
11704
|
-
|
|
12062
|
+
fallbackParser: (text) => {
|
|
12063
|
+
// First try to parse as JSON in case it's valid JSON with unusual formatting
|
|
12064
|
+
try {
|
|
12065
|
+
// Remove markdown code blocks if present
|
|
12066
|
+
let cleanText = text.trim();
|
|
12067
|
+
const codeBlockMatch = cleanText.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
|
|
12068
|
+
if (codeBlockMatch && codeBlockMatch[1]) {
|
|
12069
|
+
cleanText = codeBlockMatch[1].trim();
|
|
12070
|
+
}
|
|
12071
|
+
const parsed = JSON.parse(cleanText);
|
|
12072
|
+
if (parsed &&
|
|
12073
|
+
typeof parsed === 'object' &&
|
|
12074
|
+
typeof parsed.title === 'string' &&
|
|
12075
|
+
typeof parsed.body === 'string' &&
|
|
12076
|
+
parsed.title.length > 0) {
|
|
12077
|
+
return parsed;
|
|
12078
|
+
}
|
|
12079
|
+
}
|
|
12080
|
+
catch {
|
|
12081
|
+
// JSON parsing failed, fall through to text splitting
|
|
12082
|
+
}
|
|
12083
|
+
// Fallback to simple text splitting
|
|
12084
|
+
return {
|
|
12085
|
+
title: text.split('\n')[0] || 'Auto-generated commit',
|
|
12086
|
+
body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
|
|
12087
|
+
};
|
|
12088
|
+
},
|
|
11705
12089
|
onFallback: () => {
|
|
11706
12090
|
logger.verbose('Max retry attempts reached. Falling back to simple text output.', {
|
|
11707
12091
|
color: 'red',
|