git-coco 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,6 @@ import { RUN_KEY } from '@langchain/core/outputs';
27
27
  import { CallbackManager, parseCallbackConfigArg } from '@langchain/core/callbacks/manager';
28
28
  import '@langchain/core/utils/json_patch';
29
29
  import { simpleGit } from 'simple-git';
30
- import pQueue from 'p-queue';
31
30
  import { Document, BaseDocumentTransformer } from '@langchain/core/documents';
32
31
  import { createTwoFilesPatch } from 'diff';
33
32
  import '@langchain/core/messages';
@@ -47,7 +46,7 @@ import { pathToFileURL } from 'url';
47
46
  /**
48
47
  * Current build version from package.json
49
48
  */
50
- const BUILD_VERSION = "0.23.1";
49
+ const BUILD_VERSION = "0.25.0";
51
50
 
52
51
  const isInteractive = (config) => {
53
52
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -162,6 +161,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
162
161
  fs__default.writeFileSync(filePath, newLines.join('\n'));
163
162
  }
164
163
 
164
+ /**
165
+ * Prompt template for summarizing code diffs.
166
+ *
167
+ * TODO: Future improvements to consider:
168
+ * - Separate prompts for file-level vs directory-level summarization
169
+ * - Include file type context (e.g., "This is a React component", "This is a test file")
170
+ * - Add guidance for preserving semantic meaning of changes
171
+ * - Consider change type (added/modified/deleted) in prompt for better context
172
+ * - Include hints about the programming language for more idiomatic summaries
173
+ * - Add support for custom user-provided summarization prompts via config
174
+ */
165
175
  const template$5 = `GOAL: Use functional abstractions to summarize the following text
166
176
 
167
177
  RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
@@ -1039,6 +1049,16 @@ const schema$1 = {
1039
1049
  "description": "The maximum number of requests to make concurrently.",
1040
1050
  "default": 6
1041
1051
  },
1052
+ "minTokensForSummary": {
1053
+ "type": "number",
1054
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1055
+ "default": 400
1056
+ },
1057
+ "maxFileTokens": {
1058
+ "type": "number",
1059
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1060
+ "default": "undefined (uses 0.25 * tokenLimit)"
1061
+ },
1042
1062
  "authentication": {
1043
1063
  "anyOf": [
1044
1064
  {
@@ -1799,6 +1819,16 @@ const schema$1 = {
1799
1819
  "description": "The maximum number of requests to make concurrently.",
1800
1820
  "default": 6
1801
1821
  },
1822
+ "minTokensForSummary": {
1823
+ "type": "number",
1824
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1825
+ "default": 400
1826
+ },
1827
+ "maxFileTokens": {
1828
+ "type": "number",
1829
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1830
+ "default": "undefined (uses 0.25 * tokenLimit)"
1831
+ },
1802
1832
  "authentication": {
1803
1833
  "anyOf": [
1804
1834
  {
@@ -1950,6 +1980,16 @@ const schema$1 = {
1950
1980
  "description": "The maximum number of requests to make concurrently.",
1951
1981
  "default": 6
1952
1982
  },
1983
+ "minTokensForSummary": {
1984
+ "type": "number",
1985
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1986
+ "default": 400
1987
+ },
1988
+ "maxFileTokens": {
1989
+ "type": "number",
1990
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1991
+ "default": "undefined (uses 0.25 * tokenLimit)"
1992
+ },
1953
1993
  "authentication": {
1954
1994
  "anyOf": [
1955
1995
  {
@@ -6058,9 +6098,13 @@ const options$4 = {
6058
6098
  alias: 'b',
6059
6099
  description: 'Target branch to compare against',
6060
6100
  },
6101
+ tag: {
6102
+ type: 'string',
6103
+ alias: 't',
6104
+ description: 'Target tag to compare against',
6105
+ },
6061
6106
  sinceLastTag: {
6062
6107
  type: 'boolean',
6063
- alias: 't',
6064
6108
  description: 'Generate changelog for all commits since the last tag',
6065
6109
  default: false,
6066
6110
  },
@@ -7074,6 +7118,37 @@ async function getCommitLogAgainstBranch({ git, logger, targetBranch, }) {
7074
7118
  return [];
7075
7119
  }
7076
7120
 
7121
+ /**
7122
+ * Retrieves the commit log between the current branch and a specified tag.
7123
+ *
7124
+ * @param {Object} options - The options for retrieving the commit log.
7125
+ * @param {SimpleGit} options.git - The SimpleGit instance.
7126
+ * @param {Logger} options.logger - The logger for logging messages.
7127
+ * @param {string} options.targetTag - The tag to compare against.
7128
+ * @returns {Promise<CommitDetails[]>} The array of commit messages in the commit log.
7129
+ */
7130
+ async function getCommitLogAgainstTag({ git, logger, targetTag, }) {
7131
+ try {
7132
+ const currentBranch = await getCurrentBranchName({ git });
7133
+ const uniqueCommits = (await git.raw(['rev-list', `${targetTag}..${currentBranch}`]))
7134
+ .split('\n')
7135
+ .filter(Boolean)
7136
+ .reverse();
7137
+ logger?.verbose(`Found ${uniqueCommits.length} unique commits between "${currentBranch}" and tag "${targetTag}"`, { color: 'blue' });
7138
+ const firstCommit = uniqueCommits[0];
7139
+ const lastCommit = uniqueCommits[uniqueCommits.length - 1];
7140
+ if (!firstCommit || !lastCommit) {
7141
+ logger?.log('Unable to determine first and last commit between branch and tag', { color: 'yellow' });
7142
+ return [];
7143
+ }
7144
+ return await getCommitLogRangeDetails(firstCommit, lastCommit, { git, noMerges: true });
7145
+ }
7146
+ catch (error) {
7147
+ logger?.log('Encountered an error getting commit log between branch and tag', { color: 'red' });
7148
+ }
7149
+ return [];
7150
+ }
7151
+
7077
7152
  /**
7078
7153
  * Retrieves the commit log for the current branch.
7079
7154
  *
@@ -7692,6 +7767,15 @@ const handler$4 = async (argv, logger) => {
7692
7767
  const git = getRepo();
7693
7768
  const key = getApiKeyForModel(config);
7694
7769
  const { provider, model } = getModelAndProviderFromConfig(config);
7770
+ const exclusiveOptions = [
7771
+ argv.branch ? '--branch' : null,
7772
+ argv.tag ? '--tag' : null,
7773
+ config.sinceLastTag ? '--since-last-tag' : null,
7774
+ ].filter(Boolean);
7775
+ if (exclusiveOptions.length > 1) {
7776
+ logger.log(`Options ${exclusiveOptions.join(', ')} cannot be used together.`, { color: 'red' });
7777
+ process.exit(1);
7778
+ }
7695
7779
  if (config.service.authentication.type !== 'None' && !key) {
7696
7780
  logger.log(`No API Key found. 🗝️🚪`, { color: 'red' });
7697
7781
  process.exit(1);
@@ -7733,6 +7817,10 @@ const handler$4 = async (argv, logger) => {
7733
7817
  logger.verbose(`Generating commit log against branch: ${argv.branch}`, { color: 'yellow' });
7734
7818
  commits = await getCommitLogAgainstBranch({ git, logger, targetBranch: argv.branch });
7735
7819
  }
7820
+ else if (argv.tag) {
7821
+ logger.verbose(`Generating commit log against tag: ${argv.tag}`, { color: 'yellow' });
7822
+ commits = await getCommitLogAgainstTag({ git, logger, targetTag: argv.tag });
7823
+ }
7736
7824
  else {
7737
7825
  logger.verbose(`No range, branch, or tag option provided. Defaulting to current branch`, {
7738
7826
  color: 'yellow',
@@ -8006,6 +8094,45 @@ function repairJson(jsonString) {
8006
8094
  }
8007
8095
  }
8008
8096
 
8097
+ /**
8098
+ * Extract the first complete JSON object from a string by tracking balanced braces
8099
+ */
8100
+ function extractFirstJsonObject(text) {
8101
+ const startIndex = text.indexOf('{');
8102
+ if (startIndex === -1)
8103
+ return null;
8104
+ let braceCount = 0;
8105
+ let inString = false;
8106
+ let escapeNext = false;
8107
+ for (let i = startIndex; i < text.length; i++) {
8108
+ const char = text[i];
8109
+ if (escapeNext) {
8110
+ escapeNext = false;
8111
+ continue;
8112
+ }
8113
+ if (char === '\\') {
8114
+ escapeNext = true;
8115
+ continue;
8116
+ }
8117
+ if (char === '"') {
8118
+ inString = !inString;
8119
+ continue;
8120
+ }
8121
+ if (inString)
8122
+ continue;
8123
+ if (char === '{') {
8124
+ braceCount++;
8125
+ }
8126
+ else if (char === '}') {
8127
+ braceCount--;
8128
+ if (braceCount === 0) {
8129
+ // Found the end of the first complete JSON object
8130
+ return text.substring(startIndex, i + 1);
8131
+ }
8132
+ }
8133
+ }
8134
+ return null;
8135
+ }
8009
8136
  /**
8010
8137
  * Utility function to ensure commit messages are properly formatted as strings
8011
8138
  * rather than JSON objects, whether they come as parsed objects or stringified JSON
@@ -8024,23 +8151,26 @@ function formatCommitMessage(result, options = {}) {
8024
8151
  if (!result.includes('{') && !result.includes('"title"')) {
8025
8152
  return result;
8026
8153
  }
8027
- // Handle multiple markdown code block formats
8028
- const codeBlockPatterns = [
8154
+ // Handle multiple markdown code block formats and embedded JSON
8155
+ const extractionPatterns = [
8029
8156
  /```(?:json)?\s*(\{[\s\S]*?\})\s*```/, // Standard markdown blocks
8030
8157
  /`(\{[\s\S]*?\})`/, // Inline code blocks
8031
- /^\s*(\{[\s\S]*\})\s*$/ // Raw JSON without blocks
8158
+ /^\s*(\{[\s\S]*\})\s*$/, // Raw JSON without blocks (entire string)
8159
+ /(\{[\s\S]*?\})/ // JSON anywhere in text (fallback)
8032
8160
  ];
8033
8161
  let jsonString = result;
8162
+ let foundMatch = false;
8034
8163
  // Try each pattern to extract JSON
8035
- for (const pattern of codeBlockPatterns) {
8164
+ for (const pattern of extractionPatterns) {
8036
8165
  const match = result.match(pattern);
8037
8166
  if (match && match[1]) {
8038
8167
  jsonString = match[1].trim();
8168
+ foundMatch = true;
8039
8169
  break;
8040
8170
  }
8041
8171
  }
8042
8172
  // Only attempt JSON parsing if we found potential JSON content
8043
- if (jsonString !== result || jsonString.startsWith('{')) {
8173
+ if (foundMatch || jsonString.startsWith('{')) {
8044
8174
  try {
8045
8175
  // Try to parse as JSON to see if it's a stringified object
8046
8176
  const parsed = JSON.parse(jsonString);
@@ -8070,7 +8200,24 @@ function formatCommitMessage(result, options = {}) {
8070
8200
  }
8071
8201
  }
8072
8202
  catch {
8073
- // Repair failed, continue to fallback
8203
+ // Repair failed, try extracting just the first complete JSON object
8204
+ const firstObject = extractFirstJsonObject(jsonString);
8205
+ if (firstObject) {
8206
+ try {
8207
+ const parsed = JSON.parse(firstObject);
8208
+ if (parsed &&
8209
+ typeof parsed === 'object' &&
8210
+ typeof parsed.title === 'string' &&
8211
+ typeof parsed.body === 'string' &&
8212
+ parsed.title.length > 0 &&
8213
+ parsed.body.length > 0) {
8214
+ return constructMessage(parsed.title, parsed.body);
8215
+ }
8216
+ }
8217
+ catch {
8218
+ // Even first object extraction failed, continue to fallback
8219
+ }
8220
+ }
8074
8221
  }
8075
8222
  }
8076
8223
  }
@@ -8110,6 +8257,114 @@ async function summarize(documents, { chain, textSplitter, options }) {
8110
8257
  return res.text && res.text.trim();
8111
8258
  }
8112
8259
 
8260
+ /**
8261
+ * Summarize a single file diff that exceeds the token threshold.
8262
+ */
8263
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
8264
+ try {
8265
+ const fileSummary = await summarize([
8266
+ {
8267
+ pageContent: fileDiff.diff,
8268
+ metadata: {
8269
+ file: fileDiff.file,
8270
+ summary: fileDiff.summary,
8271
+ },
8272
+ },
8273
+ ], {
8274
+ chain,
8275
+ textSplitter,
8276
+ options: {
8277
+ returnIntermediateSteps: false,
8278
+ },
8279
+ });
8280
+ const newTokenCount = tokenizer(fileSummary);
8281
+ return {
8282
+ ...fileDiff,
8283
+ diff: fileSummary,
8284
+ tokenCount: newTokenCount,
8285
+ };
8286
+ }
8287
+ catch (error) {
8288
+ // On error, return original diff unchanged
8289
+ console.error(`Failed to summarize file ${fileDiff.file}:`, error);
8290
+ return fileDiff;
8291
+ }
8292
+ }
8293
+ /**
8294
+ * Process files in waves to respect concurrency limits.
8295
+ */
8296
+ async function processInWaves(items, processor, maxConcurrent) {
8297
+ const results = [];
8298
+ for (let i = 0; i < items.length; i += maxConcurrent) {
8299
+ const wave = items.slice(i, i + maxConcurrent);
8300
+ const waveResults = await Promise.all(wave.map(processor));
8301
+ results.push(...waveResults);
8302
+ }
8303
+ return results;
8304
+ }
8305
+ /**
8306
+ * Pre-summarize individual files that exceed the maxFileTokens threshold.
8307
+ * This prevents large files from dominating the token budget and biasing
8308
+ * the final commit message toward a single file's changes.
8309
+ *
8310
+ * @param diffs - Array of file diffs to process
8311
+ * @param options - Configuration options for summarization
8312
+ * @returns Array of file diffs with large files summarized
8313
+ */
8314
+ async function summarizeLargeFiles(diffs, options) {
8315
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
8316
+ // Identify files that need summarization
8317
+ const filesToSummarize = [];
8318
+ const results = [...diffs];
8319
+ diffs.forEach((diff, index) => {
8320
+ if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
8321
+ filesToSummarize.push({ index, diff });
8322
+ }
8323
+ });
8324
+ if (filesToSummarize.length === 0) {
8325
+ return results;
8326
+ }
8327
+ logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8328
+ // Process large files in waves
8329
+ const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
8330
+ // Update results with summarized files
8331
+ summarizedFiles.forEach((summarizedDiff, i) => {
8332
+ const originalIndex = filesToSummarize[i].index;
8333
+ const originalTokens = results[originalIndex].tokenCount;
8334
+ const newTokens = summarizedDiff.tokenCount;
8335
+ logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8336
+ results[originalIndex] = summarizedDiff;
8337
+ });
8338
+ return results;
8339
+ }
8340
+ /**
8341
+ * Pre-process a DiffNode tree, summarizing large files at the leaf level.
8342
+ * Returns a new DiffNode with updated token counts.
8343
+ */
8344
+ async function preprocessLargeFiles(rootNode, options) {
8345
+ // Collect all diffs from the tree
8346
+ const allDiffs = [];
8347
+ function collectDiffs(node) {
8348
+ allDiffs.push(...node.diffs);
8349
+ node.children.forEach(collectDiffs);
8350
+ }
8351
+ collectDiffs(rootNode);
8352
+ // Summarize large files
8353
+ const processedDiffs = await summarizeLargeFiles(allDiffs, options);
8354
+ // Create a map for quick lookup
8355
+ const diffMap = new Map();
8356
+ processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
8357
+ // Rebuild tree with processed diffs
8358
+ function rebuildNode(node) {
8359
+ return {
8360
+ path: node.path,
8361
+ diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
8362
+ children: node.children.map(rebuildNode),
8363
+ };
8364
+ }
8365
+ return rebuildNode(rootNode);
8366
+ }
8367
+
8113
8368
  /**
8114
8369
  * Create groups from a given node info.
8115
8370
  * @param {DiffNode} node - The node info to start grouping.
@@ -8162,6 +8417,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
8162
8417
  return directory;
8163
8418
  }
8164
8419
  }
8420
+ /**
8421
+ * Default output formatter for directory diffs.
8422
+ *
8423
+ * TODO: Future improvements to consider:
8424
+ * - Hierarchical output showing file -> directory -> overall summary
8425
+ * - Configurable verbosity levels (compact, standard, detailed)
8426
+ * - Machine-readable format option (JSON) for programmatic use
8427
+ * - Semantic grouping by change type (added/modified/deleted) or feature area
8428
+ * - Visual diff indicators showing magnitude of changes
8429
+ */
8165
8430
  const defaultOutputCallback = (group) => {
8166
8431
  let output = `
8167
8432
  -------\n* changes in "/${group.path}"\n\n`;
@@ -8173,41 +8438,124 @@ const defaultOutputCallback = (group) => {
8173
8438
  }
8174
8439
  return output;
8175
8440
  };
8176
- async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8177
- const queue = new pQueue({ concurrency: 8 });
8441
+ /**
8442
+ * Process directory summarization in waves to respect concurrency limits
8443
+ * while maintaining predictable behavior.
8444
+ */
8445
+ async function summarizeInWaves(directories, options) {
8446
+ const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
8447
+ let totalTokenCount = initialTotal;
8448
+ const results = [...directories];
8449
+ // Create sorted indices by token count (descending) for prioritized processing
8450
+ const sortedIndices = directories
8451
+ .map((d, i) => ({ index: i, tokens: d.tokenCount }))
8452
+ .sort((a, b) => b.tokens - a.tokens);
8453
+ let cursor = 0;
8454
+ while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
8455
+ // Select wave candidates: directories that exceed minTokensForSummary
8456
+ const wave = [];
8457
+ for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
8458
+ const { index, tokens } = sortedIndices[i];
8459
+ // Skip directories below the minimum threshold
8460
+ if (tokens < minTokensForSummary) {
8461
+ cursor = i + 1;
8462
+ continue;
8463
+ }
8464
+ // Skip directories that have already been summarized
8465
+ if (results[index].summary) {
8466
+ cursor = i + 1;
8467
+ continue;
8468
+ }
8469
+ wave.push(index);
8470
+ cursor = i + 1;
8471
+ }
8472
+ // No more eligible candidates
8473
+ if (wave.length === 0) {
8474
+ break;
8475
+ }
8476
+ logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
8477
+ // Process wave in parallel
8478
+ const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
8479
+ // Update results and recalculate total
8480
+ waveResults.forEach((result, i) => {
8481
+ const idx = wave[i];
8482
+ const originalTokens = results[idx].tokenCount;
8483
+ const newTokens = result.tokenCount;
8484
+ const reduction = originalTokens - newTokens;
8485
+ totalTokenCount -= reduction;
8486
+ results[idx] = result;
8487
+ logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
8488
+ color: 'magenta',
8489
+ });
8490
+ });
8491
+ logger.verbose(`Total token count: ${totalTokenCount}`, {
8492
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8493
+ });
8494
+ // Check if we're now under budget
8495
+ if (totalTokenCount <= maxTokens) {
8496
+ logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
8497
+ break;
8498
+ }
8499
+ }
8500
+ return { directories: results, totalTokenCount };
8501
+ }
8502
+ /**
8503
+ * Summarize diffs using a three-phase approach:
8504
+ *
8505
+ * Phase 1: Pre-process large files to prevent any single file from dominating
8506
+ * Phase 2: Group diffs by directory and assess total token count
8507
+ * Phase 3: Wave-based parallel summarization until under budget
8508
+ *
8509
+ * This approach ensures:
8510
+ * - Large files don't bias the summary
8511
+ * - Small changes preserve their detail (minTokensForSummary threshold)
8512
+ * - Efficient parallel processing with predictable behavior
8513
+ * - Early exit when under token budget
8514
+ */
8515
+ async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8516
+ // Calculate maxFileTokens as 25% of maxTokens if not specified
8517
+ const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8518
+ // PHASE 1: Pre-process large files
8519
+ logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
8520
+ const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
8521
+ maxFileTokens: effectiveMaxFileTokens,
8522
+ minTokensForSummary,
8523
+ maxConcurrent,
8524
+ tokenizer,
8525
+ logger,
8526
+ chain,
8527
+ textSplitter,
8528
+ });
8529
+ logger.stopSpinner('Files pre-processed').stopTimer();
8530
+ // PHASE 2: Directory grouping & assessment
8178
8531
  logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
8179
- const directoryDiffs = createDirectoryDiffs(rootDiffNode);
8180
- // Sort by token count descending
8532
+ const directoryDiffs = createDirectoryDiffs(preprocessedNode);
8533
+ // Sort by token count descending for consistent output ordering
8181
8534
  directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
8182
- let totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8535
+ const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8183
8536
  logger.stopSpinner('Diffs Organized').stopTimer();
8184
- logger.startSpinner(`Consolidating Diffs`, { color: 'blue' });
8185
- const processingTasks = directoryDiffs.map((group, i) => {
8186
- return queue.add(async () => {
8187
- // If the diff token count is already less than the average req, we can skip summarizing.
8188
- const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
8189
- if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
8190
- return group;
8191
- }
8192
- group = await summarizeDirectoryDiff(group, {
8193
- chain,
8194
- textSplitter,
8195
- tokenizer,
8196
- });
8197
- // We need to subtract the old token count and add the new one
8198
- totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
8199
- directoryDiffs[i] = group;
8200
- logger
8201
- .verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
8202
- .verbose(`\nTotal token count: ${totalTokenCount}`, {
8203
- color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8204
- });
8205
- return group;
8206
- }, { priority: group.tokenCount });
8537
+ logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
8538
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8207
8539
  });
8208
- await Promise.all(processingTasks);
8209
- logger.stopSpinner(`Summarized Diffs`);
8210
- return directoryDiffs.map(handleOutput).join('');
8540
+ // Early exit if already under budget
8541
+ if (totalTokenCount <= maxTokens) {
8542
+ logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
8543
+ return directoryDiffs.map(handleOutput).join('');
8544
+ }
8545
+ // PHASE 3: Wave-based summarization
8546
+ logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
8547
+ const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
8548
+ totalTokenCount,
8549
+ maxTokens,
8550
+ minTokensForSummary,
8551
+ maxConcurrent,
8552
+ logger,
8553
+ chain,
8554
+ textSplitter,
8555
+ tokenizer,
8556
+ });
8557
+ logger.stopSpinner(`Diffs Consolidated`).stopTimer();
8558
+ return summarizedDiffs.map(handleOutput).join('');
8211
8559
  }
8212
8560
 
8213
8561
  /**
@@ -11207,7 +11555,7 @@ for (var i = 0; i < 256; i++) {
11207
11555
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11208
11556
  }
11209
11557
 
11210
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
11558
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
11211
11559
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11212
11560
  const summarizationChain = loadSummarizationChain(model, {
11213
11561
  type: 'map_reduce',
@@ -11221,11 +11569,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11221
11569
  logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
11222
11570
  const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
11223
11571
  logger.stopSpinner('Diffs Collected').stopTimer();
11224
- // Summarize diffs
11572
+ // Summarize diffs using three-phase approach:
11573
+ // 1. Pre-process large files to prevent bias
11574
+ // 2. Group by directory and assess token count
11575
+ // 3. Wave-based parallel summarization until under budget
11225
11576
  logger.startTimer();
11226
11577
  const summary = await summarizeDiffs(diffs, {
11227
11578
  tokenizer,
11228
- maxTokens: maxTokens || 4096,
11579
+ maxTokens: maxTokens || 2048,
11580
+ minTokensForSummary,
11581
+ maxFileTokens,
11582
+ maxConcurrent,
11229
11583
  textSplitter,
11230
11584
  chain: summarizationChain,
11231
11585
  logger,
@@ -11528,7 +11882,16 @@ const handler$3 = async (argv, logger) => {
11528
11882
  return await fileChangeParser({
11529
11883
  changes,
11530
11884
  commit: '--staged',
11531
- options: { tokenizer, git, llm, logger, maxTokens: config.service.tokenLimit },
11885
+ options: {
11886
+ tokenizer,
11887
+ git,
11888
+ llm,
11889
+ logger,
11890
+ maxTokens: config.service.tokenLimit,
11891
+ minTokensForSummary: config.service.minTokensForSummary,
11892
+ maxFileTokens: config.service.maxFileTokens,
11893
+ maxConcurrent: config.service.maxConcurrent,
11894
+ },
11532
11895
  });
11533
11896
  }
11534
11897
  const commitMsg = await generateAndReviewLoop({
@@ -11571,18 +11934,16 @@ const handler$3 = async (argv, logger) => {
11571
11934
  REQUIRED JSON FORMAT:
11572
11935
  ${schema.description}
11573
11936
 
11574
- EXAMPLE (follow this exact structure):
11575
- {
11576
- "title": "feat(auth): add user authentication system",
11577
- "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."
11578
- }
11937
+ EXAMPLE (follow this EXACT format - compact JSON on a single line or minimal whitespace):
11938
+ {"title": "feat(auth): add user authentication system", "body": "Implement JWT-based authentication with login and logout functionality. Includes password hashing and session management."}
11579
11939
 
11580
11940
  IMPORTANT RULES:
11941
+ - Return ONLY the JSON object - NO markdown code blocks, NO backticks, NO extra text
11581
11942
  - ALL string values MUST be enclosed in double quotes
11943
+ - Use compact JSON format (minimal whitespace) for best compatibility
11582
11944
  - NO trailing commas
11583
11945
  - NO comments or additional text outside the JSON
11584
- - The "title" and "body" values must be properly quoted strings
11585
- - Return ONLY the JSON object, nothing else`;
11946
+ - The "title" and "body" values must be properly quoted strings`;
11586
11947
  // Use conventional commit prompt if enabled
11587
11948
  const promptTemplate = USE_CONVENTIONAL_COMMITS ? CONVENTIONAL_COMMIT_PROMPT : COMMIT_PROMPT;
11588
11949
  const prompt = getPrompt({
@@ -11676,10 +12037,33 @@ IMPORTANT RULES:
11676
12037
  logger.verbose(`Failed to parse commit message (attempt ${attempt}/${maxAttempts}): ${error.message}`, { color: 'yellow' });
11677
12038
  },
11678
12039
  },
11679
- fallbackParser: (text) => ({
11680
- title: text.split('\n')[0] || 'Auto-generated commit',
11681
- body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
11682
- }),
12040
+ fallbackParser: (text) => {
12041
+ // First try to parse as JSON in case it's valid JSON with unusual formatting
12042
+ try {
12043
+ // Remove markdown code blocks if present
12044
+ let cleanText = text.trim();
12045
+ const codeBlockMatch = cleanText.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
12046
+ if (codeBlockMatch && codeBlockMatch[1]) {
12047
+ cleanText = codeBlockMatch[1].trim();
12048
+ }
12049
+ const parsed = JSON.parse(cleanText);
12050
+ if (parsed &&
12051
+ typeof parsed === 'object' &&
12052
+ typeof parsed.title === 'string' &&
12053
+ typeof parsed.body === 'string' &&
12054
+ parsed.title.length > 0) {
12055
+ return parsed;
12056
+ }
12057
+ }
12058
+ catch {
12059
+ // JSON parsing failed, fall through to text splitting
12060
+ }
12061
+ // Fallback to simple text splitting
12062
+ return {
12063
+ title: text.split('\n')[0] || 'Auto-generated commit',
12064
+ body: text.split('\n').slice(1).join('\n') || 'Generated commit message',
12065
+ };
12066
+ },
11683
12067
  onFallback: () => {
11684
12068
  logger.verbose('Max retry attempts reached. Falling back to simple text output.', {
11685
12069
  color: 'red',