git-coco 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -26,7 +26,6 @@ var outputs = require('@langchain/core/outputs');
26
26
  var manager = require('@langchain/core/callbacks/manager');
27
27
  require('@langchain/core/utils/json_patch');
28
28
  var simpleGit = require('simple-git');
29
- var pQueue = require('p-queue');
30
29
  var documents = require('@langchain/core/documents');
31
30
  var diff = require('diff');
32
31
  require('@langchain/core/messages');
@@ -69,7 +68,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
69
68
  /**
70
69
  * Current build version from package.json
71
70
  */
72
- const BUILD_VERSION = "0.23.1";
71
+ const BUILD_VERSION = "0.25.0";
73
72
 
74
73
  const isInteractive = (config) => {
75
74
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -184,6 +183,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
184
183
  fs.writeFileSync(filePath, newLines.join('\n'));
185
184
  }
186
185
 
186
+ /**
187
+ * Prompt template for summarizing code diffs.
188
+ *
189
+ * TODO: Future improvements to consider:
190
+ * - Separate prompts for file-level vs directory-level summarization
191
+ * - Include file type context (e.g., "This is a React component", "This is a test file")
192
+ * - Add guidance for preserving semantic meaning of changes
193
+ * - Consider change type (added/modified/deleted) in prompt for better context
194
+ * - Include hints about the programming language for more idiomatic summaries
195
+ * - Add support for custom user-provided summarization prompts via config
196
+ */
187
197
  const template$5 = `GOAL: Use functional abstractions to summarize the following text
188
198
 
189
199
  RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
@@ -1061,6 +1071,16 @@ const schema$1 = {
1061
1071
  "description": "The maximum number of requests to make concurrently.",
1062
1072
  "default": 6
1063
1073
  },
1074
+ "minTokensForSummary": {
1075
+ "type": "number",
1076
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1077
+ "default": 400
1078
+ },
1079
+ "maxFileTokens": {
1080
+ "type": "number",
1081
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1082
+ "default": "undefined (uses 0.25 * tokenLimit)"
1083
+ },
1064
1084
  "authentication": {
1065
1085
  "anyOf": [
1066
1086
  {
@@ -1821,6 +1841,16 @@ const schema$1 = {
1821
1841
  "description": "The maximum number of requests to make concurrently.",
1822
1842
  "default": 6
1823
1843
  },
1844
+ "minTokensForSummary": {
1845
+ "type": "number",
1846
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1847
+ "default": 400
1848
+ },
1849
+ "maxFileTokens": {
1850
+ "type": "number",
1851
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1852
+ "default": "undefined (uses 0.25 * tokenLimit)"
1853
+ },
1824
1854
  "authentication": {
1825
1855
  "anyOf": [
1826
1856
  {
@@ -1972,6 +2002,16 @@ const schema$1 = {
1972
2002
  "description": "The maximum number of requests to make concurrently.",
1973
2003
  "default": 6
1974
2004
  },
2005
+ "minTokensForSummary": {
2006
+ "type": "number",
2007
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
2008
+ "default": 400
2009
+ },
2010
+ "maxFileTokens": {
2011
+ "type": "number",
2012
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
2013
+ "default": "undefined (uses 0.25 * tokenLimit)"
2014
+ },
1975
2015
  "authentication": {
1976
2016
  "anyOf": [
1977
2017
  {
@@ -6080,9 +6120,13 @@ const options$4 = {
6080
6120
  alias: 'b',
6081
6121
  description: 'Target branch to compare against',
6082
6122
  },
6123
+ tag: {
6124
+ type: 'string',
6125
+ alias: 't',
6126
+ description: 'Target tag to compare against',
6127
+ },
6083
6128
  sinceLastTag: {
6084
6129
  type: 'boolean',
6085
- alias: 't',
6086
6130
  description: 'Generate changelog for all commits since the last tag',
6087
6131
  default: false,
6088
6132
  },
@@ -7096,6 +7140,37 @@ async function getCommitLogAgainstBranch({ git, logger, targetBranch, }) {
7096
7140
  return [];
7097
7141
  }
7098
7142
 
7143
+ /**
7144
+ * Retrieves the commit log between the current branch and a specified tag.
7145
+ *
7146
+ * @param {Object} options - The options for retrieving the commit log.
7147
+ * @param {SimpleGit} options.git - The SimpleGit instance.
7148
+ * @param {Logger} options.logger - The logger for logging messages.
7149
+ * @param {string} options.targetTag - The tag to compare against.
7150
+ * @returns {Promise<CommitDetails[]>} The array of commit messages in the commit log.
7151
+ */
7152
+ async function getCommitLogAgainstTag({ git, logger, targetTag, }) {
7153
+ try {
7154
+ const currentBranch = await getCurrentBranchName({ git });
7155
+ const uniqueCommits = (await git.raw(['rev-list', `${targetTag}..${currentBranch}`]))
7156
+ .split('\n')
7157
+ .filter(Boolean)
7158
+ .reverse();
7159
+ logger?.verbose(`Found ${uniqueCommits.length} unique commits between "${currentBranch}" and tag "${targetTag}"`, { color: 'blue' });
7160
+ const firstCommit = uniqueCommits[0];
7161
+ const lastCommit = uniqueCommits[uniqueCommits.length - 1];
7162
+ if (!firstCommit || !lastCommit) {
7163
+ logger?.log('Unable to determine first and last commit between branch and tag', { color: 'yellow' });
7164
+ return [];
7165
+ }
7166
+ return await getCommitLogRangeDetails(firstCommit, lastCommit, { git, noMerges: true });
7167
+ }
7168
+ catch (error) {
7169
+ logger?.log('Encountered an error getting commit log between branch and tag', { color: 'red' });
7170
+ }
7171
+ return [];
7172
+ }
7173
+
7099
7174
  /**
7100
7175
  * Retrieves the commit log for the current branch.
7101
7176
  *
@@ -7714,6 +7789,15 @@ const handler$4 = async (argv, logger) => {
7714
7789
  const git = getRepo();
7715
7790
  const key = getApiKeyForModel(config);
7716
7791
  const { provider, model } = getModelAndProviderFromConfig(config);
7792
+ const exclusiveOptions = [
7793
+ argv.branch ? '--branch' : null,
7794
+ argv.tag ? '--tag' : null,
7795
+ config.sinceLastTag ? '--since-last-tag' : null,
7796
+ ].filter(Boolean);
7797
+ if (exclusiveOptions.length > 1) {
7798
+ logger.log(`Options ${exclusiveOptions.join(', ')} cannot be used together.`, { color: 'red' });
7799
+ process.exit(1);
7800
+ }
7717
7801
  if (config.service.authentication.type !== 'None' && !key) {
7718
7802
  logger.log(`No API Key found. 🗝️🚪`, { color: 'red' });
7719
7803
  process.exit(1);
@@ -7755,6 +7839,10 @@ const handler$4 = async (argv, logger) => {
7755
7839
  logger.verbose(`Generating commit log against branch: ${argv.branch}`, { color: 'yellow' });
7756
7840
  commits = await getCommitLogAgainstBranch({ git, logger, targetBranch: argv.branch });
7757
7841
  }
7842
+ else if (argv.tag) {
7843
+ logger.verbose(`Generating commit log against tag: ${argv.tag}`, { color: 'yellow' });
7844
+ commits = await getCommitLogAgainstTag({ git, logger, targetTag: argv.tag });
7845
+ }
7758
7846
  else {
7759
7847
  logger.verbose(`No range, branch, or tag option provided. Defaulting to current branch`, {
7760
7848
  color: 'yellow',
@@ -8028,6 +8116,45 @@ function repairJson(jsonString) {
8028
8116
  }
8029
8117
  }
8030
8118
 
8119
+ /**
8120
+ * Extract the first complete JSON object from a string by tracking balanced braces
8121
+ */
8122
+ function extractFirstJsonObject(text) {
8123
+ const startIndex = text.indexOf('{');
8124
+ if (startIndex === -1)
8125
+ return null;
8126
+ let braceCount = 0;
8127
+ let inString = false;
8128
+ let escapeNext = false;
8129
+ for (let i = startIndex; i < text.length; i++) {
8130
+ const char = text[i];
8131
+ if (escapeNext) {
8132
+ escapeNext = false;
8133
+ continue;
8134
+ }
8135
+ if (char === '\\') {
8136
+ escapeNext = true;
8137
+ continue;
8138
+ }
8139
+ if (char === '"') {
8140
+ inString = !inString;
8141
+ continue;
8142
+ }
8143
+ if (inString)
8144
+ continue;
8145
+ if (char === '{') {
8146
+ braceCount++;
8147
+ }
8148
+ else if (char === '}') {
8149
+ braceCount--;
8150
+ if (braceCount === 0) {
8151
+ // Found the end of the first complete JSON object
8152
+ return text.substring(startIndex, i + 1);
8153
+ }
8154
+ }
8155
+ }
8156
+ return null;
8157
+ }
8031
8158
  /**
8032
8159
  * Utility function to ensure commit messages are properly formatted as strings
8033
8160
  * rather than JSON objects, whether they come as parsed objects or stringified JSON
@@ -8046,23 +8173,26 @@ function formatCommitMessage(result, options = {}) {
8046
8173
  if (!result.includes('{') && !result.includes('"title"')) {
8047
8174
  return result;
8048
8175
  }
8049
- // Handle multiple markdown code block formats
8050
- const codeBlockPatterns = [
8176
+ // Handle multiple markdown code block formats and embedded JSON
8177
+ const extractionPatterns = [
8051
8178
  /```(?:json)?\s*(\{[\s\S]*?\})\s*```/, // Standard markdown blocks
8052
8179
  /`(\{[\s\S]*?\})`/, // Inline code blocks
8053
- /^\s*(\{[\s\S]*\})\s*$/ // Raw JSON without blocks
8180
+ /^\s*(\{[\s\S]*\})\s*$/, // Raw JSON without blocks (entire string)
8181
+ /(\{[\s\S]*?\})/ // JSON anywhere in text (fallback)
8054
8182
  ];
8055
8183
  let jsonString = result;
8184
+ let foundMatch = false;
8056
8185
  // Try each pattern to extract JSON
8057
- for (const pattern of codeBlockPatterns) {
8186
+ for (const pattern of extractionPatterns) {
8058
8187
  const match = result.match(pattern);
8059
8188
  if (match && match[1]) {
8060
8189
  jsonString = match[1].trim();
8190
+ foundMatch = true;
8061
8191
  break;
8062
8192
  }
8063
8193
  }
8064
8194
  // Only attempt JSON parsing if we found potential JSON content
8065
- if (jsonString !== result || jsonString.startsWith('{')) {
8195
+ if (foundMatch || jsonString.startsWith('{')) {
8066
8196
  try {
8067
8197
  // Try to parse as JSON to see if it's a stringified object
8068
8198
  const parsed = JSON.parse(jsonString);
@@ -8092,7 +8222,24 @@ function formatCommitMessage(result, options = {}) {
8092
8222
  }
8093
8223
  }
8094
8224
  catch {
8095
- // Repair failed, continue to fallback
8225
+ // Repair failed, try extracting just the first complete JSON object
8226
+ const firstObject = extractFirstJsonObject(jsonString);
8227
+ if (firstObject) {
8228
+ try {
8229
+ const parsed = JSON.parse(firstObject);
8230
+ if (parsed &&
8231
+ typeof parsed === 'object' &&
8232
+ typeof parsed.title === 'string' &&
8233
+ typeof parsed.body === 'string' &&
8234
+ parsed.title.length > 0 &&
8235
+ parsed.body.length > 0) {
8236
+ return constructMessage(parsed.title, parsed.body);
8237
+ }
8238
+ }
8239
+ catch {
8240
+ // Even first object extraction failed, continue to fallback
8241
+ }
8242
+ }
8096
8243
  }
8097
8244
  }
8098
8245
  }
@@ -8132,6 +8279,114 @@ async function summarize(documents$1, { chain, textSplitter, options }) {
8132
8279
  return res.text && res.text.trim();
8133
8280
  }
8134
8281
 
8282
+ /**
8283
+ * Summarize a single file diff that exceeds the token threshold.
8284
+ */
8285
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
8286
+ try {
8287
+ const fileSummary = await summarize([
8288
+ {
8289
+ pageContent: fileDiff.diff,
8290
+ metadata: {
8291
+ file: fileDiff.file,
8292
+ summary: fileDiff.summary,
8293
+ },
8294
+ },
8295
+ ], {
8296
+ chain,
8297
+ textSplitter,
8298
+ options: {
8299
+ returnIntermediateSteps: false,
8300
+ },
8301
+ });
8302
+ const newTokenCount = tokenizer(fileSummary);
8303
+ return {
8304
+ ...fileDiff,
8305
+ diff: fileSummary,
8306
+ tokenCount: newTokenCount,
8307
+ };
8308
+ }
8309
+ catch (error) {
8310
+ // On error, return original diff unchanged
8311
+ console.error(`Failed to summarize file ${fileDiff.file}:`, error);
8312
+ return fileDiff;
8313
+ }
8314
+ }
8315
+ /**
8316
+ * Process files in waves to respect concurrency limits.
8317
+ */
8318
+ async function processInWaves(items, processor, maxConcurrent) {
8319
+ const results = [];
8320
+ for (let i = 0; i < items.length; i += maxConcurrent) {
8321
+ const wave = items.slice(i, i + maxConcurrent);
8322
+ const waveResults = await Promise.all(wave.map(processor));
8323
+ results.push(...waveResults);
8324
+ }
8325
+ return results;
8326
+ }
8327
+ /**
8328
+ * Pre-summarize individual files that exceed the maxFileTokens threshold.
8329
+ * This prevents large files from dominating the token budget and biasing
8330
+ * the final commit message toward a single file's changes.
8331
+ *
8332
+ * @param diffs - Array of file diffs to process
8333
+ * @param options - Configuration options for summarization
8334
+ * @returns Array of file diffs with large files summarized
8335
+ */
8336
+ async function summarizeLargeFiles(diffs, options) {
8337
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
8338
+ // Identify files that need summarization
8339
+ const filesToSummarize = [];
8340
+ const results = [...diffs];
8341
+ diffs.forEach((diff, index) => {
8342
+ if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
8343
+ filesToSummarize.push({ index, diff });
8344
+ }
8345
+ });
8346
+ if (filesToSummarize.length === 0) {
8347
+ return results;
8348
+ }
8349
+ logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8350
+ // Process large files in waves
8351
+ const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
8352
+ // Update results with summarized files
8353
+ summarizedFiles.forEach((summarizedDiff, i) => {
8354
+ const originalIndex = filesToSummarize[i].index;
8355
+ const originalTokens = results[originalIndex].tokenCount;
8356
+ const newTokens = summarizedDiff.tokenCount;
8357
+ logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8358
+ results[originalIndex] = summarizedDiff;
8359
+ });
8360
+ return results;
8361
+ }
8362
+ /**
8363
+ * Pre-process a DiffNode tree, summarizing large files at the leaf level.
8364
+ * Returns a new DiffNode with updated token counts.
8365
+ */
8366
+ async function preprocessLargeFiles(rootNode, options) {
8367
+ // Collect all diffs from the tree
8368
+ const allDiffs = [];
8369
+ function collectDiffs(node) {
8370
+ allDiffs.push(...node.diffs);
8371
+ node.children.forEach(collectDiffs);
8372
+ }
8373
+ collectDiffs(rootNode);
8374
+ // Summarize large files
8375
+ const processedDiffs = await summarizeLargeFiles(allDiffs, options);
8376
+ // Create a map for quick lookup
8377
+ const diffMap = new Map();
8378
+ processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
8379
+ // Rebuild tree with processed diffs
8380
+ function rebuildNode(node) {
8381
+ return {
8382
+ path: node.path,
8383
+ diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
8384
+ children: node.children.map(rebuildNode),
8385
+ };
8386
+ }
8387
+ return rebuildNode(rootNode);
8388
+ }
8389
+
8135
8390
  /**
8136
8391
  * Create groups from a given node info.
8137
8392
  * @param {DiffNode} node - The node info to start grouping.
@@ -8184,6 +8439,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
8184
8439
  return directory;
8185
8440
  }
8186
8441
  }
8442
+ /**
8443
+ * Default output formatter for directory diffs.
8444
+ *
8445
+ * TODO: Future improvements to consider:
8446
+ * - Hierarchical output showing file -> directory -> overall summary
8447
+ * - Configurable verbosity levels (compact, standard, detailed)
8448
+ * - Machine-readable format option (JSON) for programmatic use
8449
+ * - Semantic grouping by change type (added/modified/deleted) or feature area
8450
+ * - Visual diff indicators showing magnitude of changes
8451
+ */
8187
8452
  const defaultOutputCallback = (group) => {
8188
8453
  let output = `
8189
8454
  -------\n* changes in "/${group.path}"\n\n`;
@@ -8195,41 +8460,124 @@ const defaultOutputCallback = (group) => {
8195
8460
  }
8196
8461
  return output;
8197
8462
  };
8198
- async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8199
- const queue = new pQueue({ concurrency: 8 });
8463
+ /**
8464
+ * Process directory summarization in waves to respect concurrency limits
8465
+ * while maintaining predictable behavior.
8466
+ */
8467
+ async function summarizeInWaves(directories, options) {
8468
+ const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
8469
+ let totalTokenCount = initialTotal;
8470
+ const results = [...directories];
8471
+ // Create sorted indices by token count (descending) for prioritized processing
8472
+ const sortedIndices = directories
8473
+ .map((d, i) => ({ index: i, tokens: d.tokenCount }))
8474
+ .sort((a, b) => b.tokens - a.tokens);
8475
+ let cursor = 0;
8476
+ while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
8477
+ // Select wave candidates: directories that exceed minTokensForSummary
8478
+ const wave = [];
8479
+ for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
8480
+ const { index, tokens } = sortedIndices[i];
8481
+ // Skip directories below the minimum threshold
8482
+ if (tokens < minTokensForSummary) {
8483
+ cursor = i + 1;
8484
+ continue;
8485
+ }
8486
+ // Skip directories that have already been summarized
8487
+ if (results[index].summary) {
8488
+ cursor = i + 1;
8489
+ continue;
8490
+ }
8491
+ wave.push(index);
8492
+ cursor = i + 1;
8493
+ }
8494
+ // No more eligible candidates
8495
+ if (wave.length === 0) {
8496
+ break;
8497
+ }
8498
+ logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
8499
+ // Process wave in parallel
8500
+ const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
8501
+ // Update results and recalculate total
8502
+ waveResults.forEach((result, i) => {
8503
+ const idx = wave[i];
8504
+ const originalTokens = results[idx].tokenCount;
8505
+ const newTokens = result.tokenCount;
8506
+ const reduction = originalTokens - newTokens;
8507
+ totalTokenCount -= reduction;
8508
+ results[idx] = result;
8509
+ logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
8510
+ color: 'magenta',
8511
+ });
8512
+ });
8513
+ logger.verbose(`Total token count: ${totalTokenCount}`, {
8514
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8515
+ });
8516
+ // Check if we're now under budget
8517
+ if (totalTokenCount <= maxTokens) {
8518
+ logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
8519
+ break;
8520
+ }
8521
+ }
8522
+ return { directories: results, totalTokenCount };
8523
+ }
8524
+ /**
8525
+ * Summarize diffs using a three-phase approach:
8526
+ *
8527
+ * Phase 1: Pre-process large files to prevent any single file from dominating
8528
+ * Phase 2: Group diffs by directory and assess total token count
8529
+ * Phase 3: Wave-based parallel summarization until under budget
8530
+ *
8531
+ * This approach ensures:
8532
+ * - Large files don't bias the summary
8533
+ * - Small changes preserve their detail (minTokensForSummary threshold)
8534
+ * - Efficient parallel processing with predictable behavior
8535
+ * - Early exit when under token budget
8536
+ */
8537
+ async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8538
+ // Calculate maxFileTokens as 25% of maxTokens if not specified
8539
+ const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8540
+ // PHASE 1: Pre-process large files
8541
+ logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
8542
+ const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
8543
+ maxFileTokens: effectiveMaxFileTokens,
8544
+ minTokensForSummary,
8545
+ maxConcurrent,
8546
+ tokenizer,
8547
+ logger,
8548
+ chain,
8549
+ textSplitter,
8550
+ });
8551
+ logger.stopSpinner('Files pre-processed').stopTimer();
8552
+ // PHASE 2: Directory grouping & assessment
8200
8553
  logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
8201
- const directoryDiffs = createDirectoryDiffs(rootDiffNode);
8202
- // Sort by token count descending
8554
+ const directoryDiffs = createDirectoryDiffs(preprocessedNode);
8555
+ // Sort by token count descending for consistent output ordering
8203
8556
  directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
8204
- let totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8557
+ const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8205
8558
  logger.stopSpinner('Diffs Organized').stopTimer();
8206
- logger.startSpinner(`Consolidating Diffs`, { color: 'blue' });
8207
- const processingTasks = directoryDiffs.map((group, i) => {
8208
- return queue.add(async () => {
8209
- // If the diff token count is already less than the average req, we can skip summarizing.
8210
- const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
8211
- if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
8212
- return group;
8213
- }
8214
- group = await summarizeDirectoryDiff(group, {
8215
- chain,
8216
- textSplitter,
8217
- tokenizer,
8218
- });
8219
- // We need to subtract the old token count and add the new one
8220
- totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
8221
- directoryDiffs[i] = group;
8222
- logger
8223
- .verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
8224
- .verbose(`\nTotal token count: ${totalTokenCount}`, {
8225
- color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8226
- });
8227
- return group;
8228
- }, { priority: group.tokenCount });
8559
+ logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
8560
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8229
8561
  });
8230
- await Promise.all(processingTasks);
8231
- logger.stopSpinner(`Summarized Diffs`);
8232
- return directoryDiffs.map(handleOutput).join('');
8562
+ // Early exit if already under budget
8563
+ if (totalTokenCount <= maxTokens) {
8564
+ logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
8565
+ return directoryDiffs.map(handleOutput).join('');
8566
+ }
8567
+ // PHASE 3: Wave-based summarization
8568
+ logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
8569
+ const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
8570
+ totalTokenCount,
8571
+ maxTokens,
8572
+ minTokensForSummary,
8573
+ maxConcurrent,
8574
+ logger,
8575
+ chain,
8576
+ textSplitter,
8577
+ tokenizer,
8578
+ });
8579
+ logger.stopSpinner(`Diffs Consolidated`).stopTimer();
8580
+ return summarizedDiffs.map(handleOutput).join('');
8233
8581
  }
8234
8582
 
8235
8583
  /**
@@ -11229,7 +11577,7 @@ for (var i = 0; i < 256; i++) {
11229
11577
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11230
11578
  }
11231
11579
 
11232
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
11580
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
11233
11581
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11234
11582
  const summarizationChain = loadSummarizationChain(model, {
11235
11583
  type: 'map_reduce',
@@ -11243,11 +11591,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11243
11591
  logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
11244
11592
  const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
11245
11593
  logger.stopSpinner('Diffs Collected').stopTimer();
11246
- // Summarize diffs
11594
+ // Summarize diffs using three-phase approach:
11595
+ // 1. Pre-process large files to prevent bias
11596
+ // 2. Group by directory and assess token count
11597
+ // 3. Wave-based parallel summarization until under budget
11247
11598
  logger.startTimer();
11248
11599
  const summary = await summarizeDiffs(diffs, {
11249
11600
  tokenizer,
11250
- maxTokens: maxTokens || 4096,
11601
+ maxTokens: maxTokens || 2048,
11602
+ minTokensForSummary,
11603
+ maxFileTokens,
11604
+ maxConcurrent,
11251
11605
  textSplitter,
11252
11606
  chain: summarizationChain,
11253
11607
  logger,
@@ -11550,7 +11904,16 @@ const handler$3 = async (argv, logger) => {
11550
11904
  return await fileChangeParser({
11551
11905
  changes,
11552
11906
  commit: '--staged',
11553
- options: { tokenizer, git, llm, logger, maxTokens: config.service.tokenLimit },
11907
+ options: {
11908
+ tokenizer,
11909
+ git,
11910
+ llm,
11911
+ logger,
11912
+ maxTokens: config.service.tokenLimit,
11913
+ minTokensForSummary: config.service.minTokensForSummary,
11914
+ maxFileTokens: config.service.maxFileTokens,
11915
+ maxConcurrent: config.service.maxConcurrent,
11916
+ },
11554
11917
  });
11555
11918
  }
11556
11919
  const commitMsg = await generateAndReviewLoop({
@@ -11593,18 +11956,16 @@ const handler$3 = async (argv, logger) => {
11593
11956
  REQUIRED JSON FORMAT:
11594
11957
  ${schema.description}
11595
11958
 
11596
- EXAMPLE (follow this exact structure):
11597
- {
11598
- "title": "feat(auth): add user authentication system",
11599
- "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."
11600
- }
11959
+ EXAMPLE (follow this EXACT format - compact JSON on a single line or minimal whitespace):
11960
+ {"title": "feat(auth): add user authentication system", "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."}
11601
11961
 
11602
11962
  IMPORTANT RULES:
11963
+ - Return ONLY the JSON object - NO markdown code blocks, NO backticks, NO extra text
11603
11964
  - ALL string values MUST be enclosed in double quotes
11965
+ - Use compact JSON format (minimal whitespace) for best compatibility
11604
11966
  - NO trailing commas
11605
11967
  - NO comments or additional text outside the JSON
11606
- - The "title" and "body" values must be properly quoted strings
11607
- - Return ONLY the JSON object, nothing else`;
11968
+ - The "title" and "body" values must be properly quoted strings`;
11608
11969
  // Use conventional commit prompt if enabled
11609
11970
  const promptTemplate = USE_CONVENTIONAL_COMMITS ? CONVENTIONAL_COMMIT_PROMPT : COMMIT_PROMPT;
11610
11971
  const prompt = getPrompt({
@@ -11698,10 +12059,33 @@ IMPORTANT RULES:
11698
12059
  logger.verbose(`Failed to parse commit message (attempt ${attempt}/${maxAttempts}): ${error.message}`, { color: 'yellow' });
11699
12060
  },
11700
12061
  },
11701
- fallbackParser: (text) => ({
11702
- title: text.split('\n')[0] || 'Auto-generated commit',
11703
- body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
11704
- }),
12062
+ fallbackParser: (text) => {
12063
+ // First try to parse as JSON in case it's valid JSON with unusual formatting
12064
+ try {
12065
+ // Remove markdown code blocks if present
12066
+ let cleanText = text.trim();
12067
+ const codeBlockMatch = cleanText.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
12068
+ if (codeBlockMatch && codeBlockMatch[1]) {
12069
+ cleanText = codeBlockMatch[1].trim();
12070
+ }
12071
+ const parsed = JSON.parse(cleanText);
12072
+ if (parsed &&
12073
+ typeof parsed === 'object' &&
12074
+ typeof parsed.title === 'string' &&
12075
+ typeof parsed.body === 'string' &&
12076
+ parsed.title.length > 0) {
12077
+ return parsed;
12078
+ }
12079
+ }
12080
+ catch {
12081
+ // JSON parsing failed, fall through to text splitting
12082
+ }
12083
+ // Fallback to simple text splitting
12084
+ return {
12085
+ title: text.split('\n')[0] || 'Auto-generated commit',
12086
+ body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
12087
+ };
12088
+ },
11705
12089
  onFallback: () => {
11706
12090
  logger.verbose('Max retry attempts reached. Falling back to simple text output.', {
11707
12091
  color: 'red',