git-coco 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -38,6 +38,21 @@ type BaseLLMService = {
38
38
  * @default 6
39
39
  */
40
40
  maxConcurrent?: number;
41
+ /**
42
+ * Minimum token count for a directory/file group to be eligible for summarization.
43
+ * Groups below this threshold preserve raw diffs to maintain detail.
44
+ *
45
+ * @default 400
46
+ */
47
+ minTokensForSummary?: number;
48
+ /**
49
+ * Maximum tokens allowed for a single file diff before it gets pre-summarized.
50
+ * Prevents large files from biasing the overall summary.
51
+ * If not set, defaults to 25% of tokenLimit.
52
+ *
53
+ * @default undefined (uses 0.25 * tokenLimit)
54
+ */
55
+ maxFileTokens?: number;
41
56
  authentication: Authentication;
42
57
  requestOptions?: {
43
58
  timeout?: number;
@@ -339,6 +354,21 @@ interface BaseParserOptions {
339
354
  git: SimpleGit;
340
355
  logger: Logger;
341
356
  maxTokens?: number;
357
+ /**
358
+ * Minimum token count for a directory/file group to be eligible for summarization.
359
+ * @default 400
360
+ */
361
+ minTokensForSummary?: number;
362
+ /**
363
+ * Maximum tokens allowed for a single file diff before it gets pre-summarized.
364
+ * Defaults to 25% of maxTokens if not specified.
365
+ */
366
+ maxFileTokens?: number;
367
+ /**
368
+ * Maximum number of concurrent summarization requests.
369
+ * @default 6
370
+ */
371
+ maxConcurrent?: number;
342
372
  }
343
373
  interface BaseParserInput {
344
374
  options: BaseParserOptions;
@@ -27,7 +27,6 @@ import { RUN_KEY } from '@langchain/core/outputs';
27
27
  import { CallbackManager, parseCallbackConfigArg } from '@langchain/core/callbacks/manager';
28
28
  import '@langchain/core/utils/json_patch';
29
29
  import { simpleGit } from 'simple-git';
30
- import pQueue from 'p-queue';
31
30
  import { Document, BaseDocumentTransformer } from '@langchain/core/documents';
32
31
  import { createTwoFilesPatch } from 'diff';
33
32
  import '@langchain/core/messages';
@@ -47,7 +46,7 @@ import { pathToFileURL } from 'url';
47
46
  /**
48
47
  * Current build version from package.json
49
48
  */
50
- const BUILD_VERSION = "0.24.0";
49
+ const BUILD_VERSION = "0.26.0";
51
50
 
52
51
  const isInteractive = (config) => {
53
52
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -162,6 +161,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
162
161
  fs__default.writeFileSync(filePath, newLines.join('\n'));
163
162
  }
164
163
 
164
+ /**
165
+ * Prompt template for summarizing code diffs.
166
+ *
167
+ * TODO: Future improvements to consider:
168
+ * - Separate prompts for file-level vs directory-level summarization
169
+ * - Include file type context (e.g., "This is a React component", "This is a test file")
170
+ * - Add guidance for preserving semantic meaning of changes
171
+ * - Consider change type (added/modified/deleted) in prompt for better context
172
+ * - Include hints about the programming language for more idiomatic summaries
173
+ * - Add support for custom user-provided summarization prompts via config
174
+ */
165
175
  const template$5 = `GOAL: Use functional abstractions to summarize the following text
166
176
 
167
177
  RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
@@ -347,8 +357,11 @@ function getDefaultServiceApiKey(config) {
347
357
  const DEFAULT_OPENAI_LLM_SERVICE = {
348
358
  provider: 'openai',
349
359
  model: 'gpt-4o-mini',
350
- tokenLimit: 2024,
360
+ tokenLimit: 4096,
351
361
  temperature: 0.32,
362
+ maxConcurrent: 12,
363
+ minTokensForSummary: 800,
364
+ maxFileTokens: 2000,
352
365
  authentication: {
353
366
  type: 'APIKey',
354
367
  credentials: {
@@ -360,6 +373,10 @@ const DEFAULT_ANTHROPIC_LLM_SERVICE = {
360
373
  provider: 'anthropic',
361
374
  model: 'claude-3-5-sonnet-20240620',
362
375
  temperature: 0.32,
376
+ tokenLimit: 4096,
377
+ maxConcurrent: 12,
378
+ minTokensForSummary: 800,
379
+ maxFileTokens: 2000,
363
380
  authentication: {
364
381
  type: 'APIKey',
365
382
  credentials: {
@@ -372,9 +389,11 @@ const DEFAULT_OLLAMA_LLM_SERVICE = {
372
389
  model: 'llama3',
373
390
  endpoint: 'http://localhost:11434',
374
391
  maxConcurrent: 1,
375
- tokenLimit: 2024,
392
+ tokenLimit: 4096,
376
393
  temperature: 0.4,
377
394
  maxParsingAttempts: 3,
395
+ minTokensForSummary: 800,
396
+ maxFileTokens: 2000,
378
397
  authentication: {
379
398
  type: 'None',
380
399
  credentials: undefined,
@@ -622,6 +641,24 @@ function loadGitConfig(config) {
622
641
  service = {
623
642
  provider: gitConfigParsed.coco?.serviceProvider,
624
643
  model: gitConfigParsed.coco?.serviceModel,
644
+ tokenLimit: gitConfigParsed.coco?.serviceTokenLimit
645
+ ? Number(gitConfigParsed.coco.serviceTokenLimit)
646
+ : undefined,
647
+ temperature: gitConfigParsed.coco?.serviceTemperature
648
+ ? Number(gitConfigParsed.coco.serviceTemperature)
649
+ : undefined,
650
+ maxConcurrent: gitConfigParsed.coco?.serviceMaxConcurrent
651
+ ? Number(gitConfigParsed.coco.serviceMaxConcurrent)
652
+ : undefined,
653
+ minTokensForSummary: gitConfigParsed.coco?.serviceMinTokensForSummary
654
+ ? Number(gitConfigParsed.coco.serviceMinTokensForSummary)
655
+ : undefined,
656
+ maxFileTokens: gitConfigParsed.coco?.serviceMaxFileTokens
657
+ ? Number(gitConfigParsed.coco.serviceMaxFileTokens)
658
+ : undefined,
659
+ maxParsingAttempts: gitConfigParsed.coco?.serviceMaxParsingAttempts
660
+ ? Number(gitConfigParsed.coco.serviceMaxParsingAttempts)
661
+ : undefined,
625
662
  authentication: {
626
663
  type: 'APIKey',
627
664
  credentials: {
@@ -677,6 +714,24 @@ const appendToGitConfig = async (filePath, config) => {
677
714
  if (service.authentication.type === 'APIKey') {
678
715
  contentLines.push(` serviceApiKey = ${service.authentication.credentials.apiKey}`);
679
716
  }
717
+ if (service.tokenLimit !== undefined) {
718
+ contentLines.push(` serviceTokenLimit = ${service.tokenLimit}`);
719
+ }
720
+ if (service.temperature !== undefined) {
721
+ contentLines.push(` serviceTemperature = ${service.temperature}`);
722
+ }
723
+ if (service.maxConcurrent !== undefined) {
724
+ contentLines.push(` serviceMaxConcurrent = ${service.maxConcurrent}`);
725
+ }
726
+ if (service.minTokensForSummary !== undefined) {
727
+ contentLines.push(` serviceMinTokensForSummary = ${service.minTokensForSummary}`);
728
+ }
729
+ if (service.maxFileTokens !== undefined) {
730
+ contentLines.push(` serviceMaxFileTokens = ${service.maxFileTokens}`);
731
+ }
732
+ if (service.maxParsingAttempts !== undefined) {
733
+ contentLines.push(` serviceMaxParsingAttempts = ${service.maxParsingAttempts}`);
734
+ }
680
735
  if (service.requestOptions?.timeout) {
681
736
  contentLines.push(` serviceRequestOptionsTimeout = ${service.requestOptions.timeout}`);
682
737
  }
@@ -1039,6 +1094,16 @@ const schema$1 = {
1039
1094
  "description": "The maximum number of requests to make concurrently.",
1040
1095
  "default": 6
1041
1096
  },
1097
+ "minTokensForSummary": {
1098
+ "type": "number",
1099
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1100
+ "default": 400
1101
+ },
1102
+ "maxFileTokens": {
1103
+ "type": "number",
1104
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1105
+ "default": "undefined (uses 0.25 * tokenLimit)"
1106
+ },
1042
1107
  "authentication": {
1043
1108
  "anyOf": [
1044
1109
  {
@@ -1799,6 +1864,16 @@ const schema$1 = {
1799
1864
  "description": "The maximum number of requests to make concurrently.",
1800
1865
  "default": 6
1801
1866
  },
1867
+ "minTokensForSummary": {
1868
+ "type": "number",
1869
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1870
+ "default": 400
1871
+ },
1872
+ "maxFileTokens": {
1873
+ "type": "number",
1874
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1875
+ "default": "undefined (uses 0.25 * tokenLimit)"
1876
+ },
1802
1877
  "authentication": {
1803
1878
  "anyOf": [
1804
1879
  {
@@ -1950,6 +2025,16 @@ const schema$1 = {
1950
2025
  "description": "The maximum number of requests to make concurrently.",
1951
2026
  "default": 6
1952
2027
  },
2028
+ "minTokensForSummary": {
2029
+ "type": "number",
2030
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
2031
+ "default": 400
2032
+ },
2033
+ "maxFileTokens": {
2034
+ "type": "number",
2035
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
2036
+ "default": "undefined (uses 0.25 * tokenLimit)"
2037
+ },
1953
2038
  "authentication": {
1954
2039
  "anyOf": [
1955
2040
  {
@@ -8217,6 +8302,114 @@ async function summarize(documents, { chain, textSplitter, options }) {
8217
8302
  return res.text && res.text.trim();
8218
8303
  }
8219
8304
 
8305
+ /**
8306
+ * Summarize a single file diff that exceeds the token threshold.
8307
+ */
8308
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
8309
+ try {
8310
+ const fileSummary = await summarize([
8311
+ {
8312
+ pageContent: fileDiff.diff,
8313
+ metadata: {
8314
+ file: fileDiff.file,
8315
+ summary: fileDiff.summary,
8316
+ },
8317
+ },
8318
+ ], {
8319
+ chain,
8320
+ textSplitter,
8321
+ options: {
8322
+ returnIntermediateSteps: false,
8323
+ },
8324
+ });
8325
+ const newTokenCount = tokenizer(fileSummary);
8326
+ return {
8327
+ ...fileDiff,
8328
+ diff: fileSummary,
8329
+ tokenCount: newTokenCount,
8330
+ };
8331
+ }
8332
+ catch (error) {
8333
+ // On error, return original diff unchanged
8334
+ console.error(`Failed to summarize file ${fileDiff.file}:`, error);
8335
+ return fileDiff;
8336
+ }
8337
+ }
8338
+ /**
8339
+ * Process files in waves to respect concurrency limits.
8340
+ */
8341
+ async function processInWaves(items, processor, maxConcurrent) {
8342
+ const results = [];
8343
+ for (let i = 0; i < items.length; i += maxConcurrent) {
8344
+ const wave = items.slice(i, i + maxConcurrent);
8345
+ const waveResults = await Promise.all(wave.map(processor));
8346
+ results.push(...waveResults);
8347
+ }
8348
+ return results;
8349
+ }
8350
+ /**
8351
+ * Pre-summarize individual files that exceed the maxFileTokens threshold.
8352
+ * This prevents large files from dominating the token budget and biasing
8353
+ * the final commit message toward a single file's changes.
8354
+ *
8355
+ * @param diffs - Array of file diffs to process
8356
+ * @param options - Configuration options for summarization
8357
+ * @returns Array of file diffs with large files summarized
8358
+ */
8359
+ async function summarizeLargeFiles(diffs, options) {
8360
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
8361
+ // Identify files that need summarization
8362
+ const filesToSummarize = [];
8363
+ const results = [...diffs];
8364
+ diffs.forEach((diff, index) => {
8365
+ if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
8366
+ filesToSummarize.push({ index, diff });
8367
+ }
8368
+ });
8369
+ if (filesToSummarize.length === 0) {
8370
+ return results;
8371
+ }
8372
+ logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8373
+ // Process large files in waves
8374
+ const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
8375
+ // Update results with summarized files
8376
+ summarizedFiles.forEach((summarizedDiff, i) => {
8377
+ const originalIndex = filesToSummarize[i].index;
8378
+ const originalTokens = results[originalIndex].tokenCount;
8379
+ const newTokens = summarizedDiff.tokenCount;
8380
+ logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8381
+ results[originalIndex] = summarizedDiff;
8382
+ });
8383
+ return results;
8384
+ }
8385
+ /**
8386
+ * Pre-process a DiffNode tree, summarizing large files at the leaf level.
8387
+ * Returns a new DiffNode with updated token counts.
8388
+ */
8389
+ async function preprocessLargeFiles(rootNode, options) {
8390
+ // Collect all diffs from the tree
8391
+ const allDiffs = [];
8392
+ function collectDiffs(node) {
8393
+ allDiffs.push(...node.diffs);
8394
+ node.children.forEach(collectDiffs);
8395
+ }
8396
+ collectDiffs(rootNode);
8397
+ // Summarize large files
8398
+ const processedDiffs = await summarizeLargeFiles(allDiffs, options);
8399
+ // Create a map for quick lookup
8400
+ const diffMap = new Map();
8401
+ processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
8402
+ // Rebuild tree with processed diffs
8403
+ function rebuildNode(node) {
8404
+ return {
8405
+ path: node.path,
8406
+ diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
8407
+ children: node.children.map(rebuildNode),
8408
+ };
8409
+ }
8410
+ return rebuildNode(rootNode);
8411
+ }
8412
+
8220
8413
  /**
8221
8414
  * Create groups from a given node info.
8222
8415
  * @param {DiffNode} node - The node info to start grouping.
@@ -8269,6 +8462,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
8269
8462
  return directory;
8270
8463
  }
8271
8464
  }
8465
+ /**
8466
+ * Default output formatter for directory diffs.
8467
+ *
8468
+ * TODO: Future improvements to consider:
8469
+ * - Hierarchical output showing file -> directory -> overall summary
8470
+ * - Configurable verbosity levels (compact, standard, detailed)
8471
+ * - Machine-readable format option (JSON) for programmatic use
8472
+ * - Semantic grouping by change type (added/modified/deleted) or feature area
8473
+ * - Visual diff indicators showing magnitude of changes
8474
+ */
8272
8475
  const defaultOutputCallback = (group) => {
8273
8476
  let output = `
8274
8477
  -------\n* changes in "/${group.path}"\n\n`;
@@ -8280,41 +8483,124 @@ const defaultOutputCallback = (group) => {
8280
8483
  }
8281
8484
  return output;
8282
8485
  };
8283
- async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8284
- const queue = new pQueue({ concurrency: 8 });
8486
+ /**
8487
+ * Process directory summarization in waves to respect concurrency limits
8488
+ * while maintaining predictable behavior.
8489
+ */
8490
+ async function summarizeInWaves(directories, options) {
8491
+ const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
8492
+ let totalTokenCount = initialTotal;
8493
+ const results = [...directories];
8494
+ // Create sorted indices by token count (descending) for prioritized processing
8495
+ const sortedIndices = directories
8496
+ .map((d, i) => ({ index: i, tokens: d.tokenCount }))
8497
+ .sort((a, b) => b.tokens - a.tokens);
8498
+ let cursor = 0;
8499
+ while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
8500
+ // Select wave candidates: directories that exceed minTokensForSummary
8501
+ const wave = [];
8502
+ for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
8503
+ const { index, tokens } = sortedIndices[i];
8504
+ // Skip directories below the minimum threshold
8505
+ if (tokens < minTokensForSummary) {
8506
+ cursor = i + 1;
8507
+ continue;
8508
+ }
8509
+ // Skip directories that have already been summarized
8510
+ if (results[index].summary) {
8511
+ cursor = i + 1;
8512
+ continue;
8513
+ }
8514
+ wave.push(index);
8515
+ cursor = i + 1;
8516
+ }
8517
+ // No more eligible candidates
8518
+ if (wave.length === 0) {
8519
+ break;
8520
+ }
8521
+ logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
8522
+ // Process wave in parallel
8523
+ const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
8524
+ // Update results and recalculate total
8525
+ waveResults.forEach((result, i) => {
8526
+ const idx = wave[i];
8527
+ const originalTokens = results[idx].tokenCount;
8528
+ const newTokens = result.tokenCount;
8529
+ const reduction = originalTokens - newTokens;
8530
+ totalTokenCount -= reduction;
8531
+ results[idx] = result;
8532
+ logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
8533
+ color: 'magenta',
8534
+ });
8535
+ });
8536
+ logger.verbose(`Total token count: ${totalTokenCount}`, {
8537
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8538
+ });
8539
+ // Check if we're now under budget
8540
+ if (totalTokenCount <= maxTokens) {
8541
+ logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
8542
+ break;
8543
+ }
8544
+ }
8545
+ return { directories: results, totalTokenCount };
8546
+ }
8547
+ /**
8548
+ * Summarize diffs using a three-phase approach:
8549
+ *
8550
+ * Phase 1: Pre-process large files to prevent any single file from dominating
8551
+ * Phase 2: Group diffs by directory and assess total token count
8552
+ * Phase 3: Wave-based parallel summarization until under budget
8553
+ *
8554
+ * This approach ensures:
8555
+ * - Large files don't bias the summary
8556
+ * - Small changes preserve their detail (minTokensForSummary threshold)
8557
+ * - Efficient parallel processing with predictable behavior
8558
+ * - Early exit when under token budget
8559
+ */
8560
+ async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8561
+ // Calculate maxFileTokens as 25% of maxTokens if not specified
8562
+ const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8563
+ // PHASE 1: Pre-process large files
8564
+ logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
8565
+ const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
8566
+ maxFileTokens: effectiveMaxFileTokens,
8567
+ minTokensForSummary,
8568
+ maxConcurrent,
8569
+ tokenizer,
8570
+ logger,
8571
+ chain,
8572
+ textSplitter,
8573
+ });
8574
+ logger.stopSpinner('Files pre-processed').stopTimer();
8575
+ // PHASE 2: Directory grouping & assessment
8285
8576
  logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
8286
- const directoryDiffs = createDirectoryDiffs(rootDiffNode);
8287
- // Sort by token count descending
8577
+ const directoryDiffs = createDirectoryDiffs(preprocessedNode);
8578
+ // Sort by token count descending for consistent output ordering
8288
8579
  directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
8289
- let totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8580
+ const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8290
8581
  logger.stopSpinner('Diffs Organized').stopTimer();
8291
- logger.startSpinner(`Consolidating Diffs`, { color: 'blue' });
8292
- const processingTasks = directoryDiffs.map((group, i) => {
8293
- return queue.add(async () => {
8294
- // If the diff token count is already less than the average req, we can skip summarizing.
8295
- const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
8296
- if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
8297
- return group;
8298
- }
8299
- group = await summarizeDirectoryDiff(group, {
8300
- chain,
8301
- textSplitter,
8302
- tokenizer,
8303
- });
8304
- // We need to subtract the old token count and add the new one
8305
- totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
8306
- directoryDiffs[i] = group;
8307
- logger
8308
- .verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
8309
- .verbose(`\nTotal token count: ${totalTokenCount}`, {
8310
- color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8311
- });
8312
- return group;
8313
- }, { priority: group.tokenCount });
8582
+ logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
8583
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8584
+ });
8585
+ // Early exit if already under budget
8586
+ if (totalTokenCount <= maxTokens) {
8587
+ logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
8588
+ return directoryDiffs.map(handleOutput).join('');
8589
+ }
8590
+ // PHASE 3: Wave-based summarization
8591
+ logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
8592
+ const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
8593
+ totalTokenCount,
8594
+ maxTokens,
8595
+ minTokensForSummary,
8596
+ maxConcurrent,
8597
+ logger,
8598
+ chain,
8599
+ textSplitter,
8600
+ tokenizer,
8314
8601
  });
8315
- await Promise.all(processingTasks);
8316
- logger.stopSpinner(`Summarized Diffs`);
8317
- return directoryDiffs.map(handleOutput).join('');
8602
+ logger.stopSpinner(`Diffs Consolidated`).stopTimer();
8603
+ return summarizedDiffs.map(handleOutput).join('');
8318
8604
  }
8319
8605
 
8320
8606
  /**
@@ -11314,7 +11600,7 @@ for (var i = 0; i < 256; i++) {
11314
11600
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11315
11601
  }
11316
11602
 
11317
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
11603
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
11318
11604
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11319
11605
  const summarizationChain = loadSummarizationChain(model, {
11320
11606
  type: 'map_reduce',
@@ -11328,11 +11614,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11328
11614
  logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
11329
11615
  const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
11330
11616
  logger.stopSpinner('Diffs Collected').stopTimer();
11331
- // Summarize diffs
11617
+ // Summarize diffs using three-phase approach:
11618
+ // 1. Pre-process large files to prevent bias
11619
+ // 2. Group by directory and assess token count
11620
+ // 3. Wave-based parallel summarization until under budget
11332
11621
  logger.startTimer();
11333
11622
  const summary = await summarizeDiffs(diffs, {
11334
11623
  tokenizer,
11335
- maxTokens: maxTokens || 4096,
11624
+ maxTokens: maxTokens || 2048,
11625
+ minTokensForSummary,
11626
+ maxFileTokens,
11627
+ maxConcurrent,
11336
11628
  textSplitter,
11337
11629
  chain: summarizationChain,
11338
11630
  logger,
@@ -11635,7 +11927,16 @@ const handler$3 = async (argv, logger) => {
11635
11927
  return await fileChangeParser({
11636
11928
  changes,
11637
11929
  commit: '--staged',
11638
- options: { tokenizer, git, llm, logger, maxTokens: config.service.tokenLimit },
11930
+ options: {
11931
+ tokenizer,
11932
+ git,
11933
+ llm,
11934
+ logger,
11935
+ maxTokens: config.service.tokenLimit,
11936
+ minTokensForSummary: config.service.minTokensForSummary,
11937
+ maxFileTokens: config.service.maxFileTokens,
11938
+ maxConcurrent: config.service.maxConcurrent,
11939
+ },
11639
11940
  });
11640
11941
  }
11641
11942
  const commitMsg = await generateAndReviewLoop({
package/dist/index.js CHANGED
@@ -26,7 +26,6 @@ var outputs = require('@langchain/core/outputs');
26
26
  var manager = require('@langchain/core/callbacks/manager');
27
27
  require('@langchain/core/utils/json_patch');
28
28
  var simpleGit = require('simple-git');
29
- var pQueue = require('p-queue');
30
29
  var documents = require('@langchain/core/documents');
31
30
  var diff = require('diff');
32
31
  require('@langchain/core/messages');
@@ -69,7 +68,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
69
68
  /**
70
69
  * Current build version from package.json
71
70
  */
72
- const BUILD_VERSION = "0.24.0";
71
+ const BUILD_VERSION = "0.26.0";
73
72
 
74
73
  const isInteractive = (config) => {
75
74
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -184,6 +183,17 @@ async function updateFileSection({ filePath, startComment, endComment, getNewCon
184
183
  fs.writeFileSync(filePath, newLines.join('\n'));
185
184
  }
186
185
 
186
+ /**
187
+ * Prompt template for summarizing code diffs.
188
+ *
189
+ * TODO: Future improvements to consider:
190
+ * - Separate prompts for file-level vs directory-level summarization
191
+ * - Include file type context (e.g., "This is a React component", "This is a test file")
192
+ * - Add guidance for preserving semantic meaning of changes
193
+ * - Consider change type (added/modified/deleted) in prompt for better context
194
+ * - Include hints about the programming language for more idiomatic summaries
195
+ * - Add support for custom user-provided summarization prompts via config
196
+ */
187
197
  const template$5 = `GOAL: Use functional abstractions to summarize the following text
188
198
 
189
199
  RULES: Avoid phrases like "this change", "this code", or "this function" etc. Instead refer to the function, variable, or class by name.
@@ -369,8 +379,11 @@ function getDefaultServiceApiKey(config) {
369
379
  const DEFAULT_OPENAI_LLM_SERVICE = {
370
380
  provider: 'openai',
371
381
  model: 'gpt-4o-mini',
372
- tokenLimit: 2024,
382
+ tokenLimit: 4096,
373
383
  temperature: 0.32,
384
+ maxConcurrent: 12,
385
+ minTokensForSummary: 800,
386
+ maxFileTokens: 2000,
374
387
  authentication: {
375
388
  type: 'APIKey',
376
389
  credentials: {
@@ -382,6 +395,10 @@ const DEFAULT_ANTHROPIC_LLM_SERVICE = {
382
395
  provider: 'anthropic',
383
396
  model: 'claude-3-5-sonnet-20240620',
384
397
  temperature: 0.32,
398
+ tokenLimit: 4096,
399
+ maxConcurrent: 12,
400
+ minTokensForSummary: 800,
401
+ maxFileTokens: 2000,
385
402
  authentication: {
386
403
  type: 'APIKey',
387
404
  credentials: {
@@ -394,9 +411,11 @@ const DEFAULT_OLLAMA_LLM_SERVICE = {
394
411
  model: 'llama3',
395
412
  endpoint: 'http://localhost:11434',
396
413
  maxConcurrent: 1,
397
- tokenLimit: 2024,
414
+ tokenLimit: 4096,
398
415
  temperature: 0.4,
399
416
  maxParsingAttempts: 3,
417
+ minTokensForSummary: 800,
418
+ maxFileTokens: 2000,
400
419
  authentication: {
401
420
  type: 'None',
402
421
  credentials: undefined,
@@ -644,6 +663,24 @@ function loadGitConfig(config) {
644
663
  service = {
645
664
  provider: gitConfigParsed.coco?.serviceProvider,
646
665
  model: gitConfigParsed.coco?.serviceModel,
666
+ tokenLimit: gitConfigParsed.coco?.serviceTokenLimit
667
+ ? Number(gitConfigParsed.coco.serviceTokenLimit)
668
+ : undefined,
669
+ temperature: gitConfigParsed.coco?.serviceTemperature
670
+ ? Number(gitConfigParsed.coco.serviceTemperature)
671
+ : undefined,
672
+ maxConcurrent: gitConfigParsed.coco?.serviceMaxConcurrent
673
+ ? Number(gitConfigParsed.coco.serviceMaxConcurrent)
674
+ : undefined,
675
+ minTokensForSummary: gitConfigParsed.coco?.serviceMinTokensForSummary
676
+ ? Number(gitConfigParsed.coco.serviceMinTokensForSummary)
677
+ : undefined,
678
+ maxFileTokens: gitConfigParsed.coco?.serviceMaxFileTokens
679
+ ? Number(gitConfigParsed.coco.serviceMaxFileTokens)
680
+ : undefined,
681
+ maxParsingAttempts: gitConfigParsed.coco?.serviceMaxParsingAttempts
682
+ ? Number(gitConfigParsed.coco.serviceMaxParsingAttempts)
683
+ : undefined,
647
684
  authentication: {
648
685
  type: 'APIKey',
649
686
  credentials: {
@@ -699,6 +736,24 @@ const appendToGitConfig = async (filePath, config) => {
699
736
  if (service.authentication.type === 'APIKey') {
700
737
  contentLines.push(` serviceApiKey = ${service.authentication.credentials.apiKey}`);
701
738
  }
739
+ if (service.tokenLimit !== undefined) {
740
+ contentLines.push(` serviceTokenLimit = ${service.tokenLimit}`);
741
+ }
742
+ if (service.temperature !== undefined) {
743
+ contentLines.push(` serviceTemperature = ${service.temperature}`);
744
+ }
745
+ if (service.maxConcurrent !== undefined) {
746
+ contentLines.push(` serviceMaxConcurrent = ${service.maxConcurrent}`);
747
+ }
748
+ if (service.minTokensForSummary !== undefined) {
749
+ contentLines.push(` serviceMinTokensForSummary = ${service.minTokensForSummary}`);
750
+ }
751
+ if (service.maxFileTokens !== undefined) {
752
+ contentLines.push(` serviceMaxFileTokens = ${service.maxFileTokens}`);
753
+ }
754
+ if (service.maxParsingAttempts !== undefined) {
755
+ contentLines.push(` serviceMaxParsingAttempts = ${service.maxParsingAttempts}`);
756
+ }
702
757
  if (service.requestOptions?.timeout) {
703
758
  contentLines.push(` serviceRequestOptionsTimeout = ${service.requestOptions.timeout}`);
704
759
  }
@@ -1061,6 +1116,16 @@ const schema$1 = {
1061
1116
  "description": "The maximum number of requests to make concurrently.",
1062
1117
  "default": 6
1063
1118
  },
1119
+ "minTokensForSummary": {
1120
+ "type": "number",
1121
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1122
+ "default": 400
1123
+ },
1124
+ "maxFileTokens": {
1125
+ "type": "number",
1126
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1127
+ "default": "undefined (uses 0.25 * tokenLimit)"
1128
+ },
1064
1129
  "authentication": {
1065
1130
  "anyOf": [
1066
1131
  {
@@ -1821,6 +1886,16 @@ const schema$1 = {
1821
1886
  "description": "The maximum number of requests to make concurrently.",
1822
1887
  "default": 6
1823
1888
  },
1889
+ "minTokensForSummary": {
1890
+ "type": "number",
1891
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
1892
+ "default": 400
1893
+ },
1894
+ "maxFileTokens": {
1895
+ "type": "number",
1896
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
1897
+ "default": "undefined (uses 0.25 * tokenLimit)"
1898
+ },
1824
1899
  "authentication": {
1825
1900
  "anyOf": [
1826
1901
  {
@@ -1972,6 +2047,16 @@ const schema$1 = {
1972
2047
  "description": "The maximum number of requests to make concurrently.",
1973
2048
  "default": 6
1974
2049
  },
2050
+ "minTokensForSummary": {
2051
+ "type": "number",
2052
+ "description": "Minimum token count for a directory/file group to be eligible for summarization. Groups below this threshold preserve raw diffs to maintain detail.",
2053
+ "default": 400
2054
+ },
2055
+ "maxFileTokens": {
2056
+ "type": "number",
2057
+ "description": "Maximum tokens allowed for a single file diff before it gets pre-summarized. Prevents large files from biasing the overall summary. If not set, defaults to 25% of tokenLimit.",
2058
+ "default": "undefined (uses 0.25 * tokenLimit)"
2059
+ },
1975
2060
  "authentication": {
1976
2061
  "anyOf": [
1977
2062
  {
@@ -8239,6 +8324,114 @@ async function summarize(documents$1, { chain, textSplitter, options }) {
8239
8324
  return res.text && res.text.trim();
8240
8325
  }
8241
8326
 
8327
+ /**
8328
+ * Summarize a single file diff that exceeds the token threshold.
8329
+ */
8330
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer }) {
8331
+ try {
8332
+ const fileSummary = await summarize([
8333
+ {
8334
+ pageContent: fileDiff.diff,
8335
+ metadata: {
8336
+ file: fileDiff.file,
8337
+ summary: fileDiff.summary,
8338
+ },
8339
+ },
8340
+ ], {
8341
+ chain,
8342
+ textSplitter,
8343
+ options: {
8344
+ returnIntermediateSteps: false,
8345
+ },
8346
+ });
8347
+ const newTokenCount = tokenizer(fileSummary);
8348
+ return {
8349
+ ...fileDiff,
8350
+ diff: fileSummary,
8351
+ tokenCount: newTokenCount,
8352
+ };
8353
+ }
8354
+ catch (error) {
8355
+ // On error, return original diff unchanged
8356
+ console.error(`Failed to summarize file ${fileDiff.file}:`, error);
8357
+ return fileDiff;
8358
+ }
8359
+ }
8360
+ /**
8361
+ * Process files in waves to respect concurrency limits.
8362
+ */
8363
+ async function processInWaves(items, processor, maxConcurrent) {
8364
+ const results = [];
8365
+ for (let i = 0; i < items.length; i += maxConcurrent) {
8366
+ const wave = items.slice(i, i + maxConcurrent);
8367
+ const waveResults = await Promise.all(wave.map(processor));
8368
+ results.push(...waveResults);
8369
+ }
8370
+ return results;
8371
+ }
8372
+ /**
8373
+ * Pre-summarize individual files that exceed the maxFileTokens threshold.
8374
+ * This prevents large files from dominating the token budget and biasing
8375
+ * the final commit message toward a single file's changes.
8376
+ *
8377
+ * @param diffs - Array of file diffs to process
8378
+ * @param options - Configuration options for summarization
8379
+ * @returns Array of file diffs with large files summarized
8380
+ */
8381
+ async function summarizeLargeFiles(diffs, options) {
8382
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter } = options;
8383
+ // Identify files that need summarization
8384
+ const filesToSummarize = [];
8385
+ const results = [...diffs];
8386
+ diffs.forEach((diff, index) => {
8387
+ if (diff.tokenCount > maxFileTokens && diff.tokenCount >= minTokensForSummary) {
8388
+ filesToSummarize.push({ index, diff });
8389
+ }
8390
+ });
8391
+ if (filesToSummarize.length === 0) {
8392
+ return results;
8393
+ }
8394
+ logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8395
+ // Process large files in waves
8396
+ const summarizedFiles = await processInWaves(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer }), maxConcurrent);
8397
+ // Update results with summarized files
8398
+ summarizedFiles.forEach((summarizedDiff, i) => {
8399
+ const originalIndex = filesToSummarize[i].index;
8400
+ const originalTokens = results[originalIndex].tokenCount;
8401
+ const newTokens = summarizedDiff.tokenCount;
8402
+ logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8403
+ results[originalIndex] = summarizedDiff;
8404
+ });
8405
+ return results;
8406
+ }
8407
+ /**
8408
+ * Pre-process a DiffNode tree, summarizing large files at the leaf level.
8409
+ * Returns a new DiffNode with updated token counts.
8410
+ */
8411
+ async function preprocessLargeFiles(rootNode, options) {
8412
+ // Collect all diffs from the tree
8413
+ const allDiffs = [];
8414
+ function collectDiffs(node) {
8415
+ allDiffs.push(...node.diffs);
8416
+ node.children.forEach(collectDiffs);
8417
+ }
8418
+ collectDiffs(rootNode);
8419
+ // Summarize large files
8420
+ const processedDiffs = await summarizeLargeFiles(allDiffs, options);
8421
+ // Create a map for quick lookup
8422
+ const diffMap = new Map();
8423
+ processedDiffs.forEach((diff) => diffMap.set(diff.file, diff));
8424
+ // Rebuild tree with processed diffs
8425
+ function rebuildNode(node) {
8426
+ return {
8427
+ path: node.path,
8428
+ diffs: node.diffs.map((diff) => diffMap.get(diff.file) || diff),
8429
+ children: node.children.map(rebuildNode),
8430
+ };
8431
+ }
8432
+ return rebuildNode(rootNode);
8433
+ }
8434
+
8242
8435
  /**
8243
8436
  * Create groups from a given node info.
8244
8437
  * @param {DiffNode} node - The node info to start grouping.
@@ -8291,6 +8484,16 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
8291
8484
  return directory;
8292
8485
  }
8293
8486
  }
8487
+ /**
8488
+ * Default output formatter for directory diffs.
8489
+ *
8490
+ * TODO: Future improvements to consider:
8491
+ * - Hierarchical output showing file -> directory -> overall summary
8492
+ * - Configurable verbosity levels (compact, standard, detailed)
8493
+ * - Machine-readable format option (JSON) for programmatic use
8494
+ * - Semantic grouping by change type (added/modified/deleted) or feature area
8495
+ * - Visual diff indicators showing magnitude of changes
8496
+ */
8294
8497
  const defaultOutputCallback = (group) => {
8295
8498
  let output = `
8296
8499
  -------\n* changes in "/${group.path}"\n\n`;
@@ -8302,41 +8505,124 @@ const defaultOutputCallback = (group) => {
8302
8505
  }
8303
8506
  return output;
8304
8507
  };
8305
- async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8306
- const queue = new pQueue({ concurrency: 8 });
8508
+ /**
8509
+ * Process directory summarization in waves to respect concurrency limits
8510
+ * while maintaining predictable behavior.
8511
+ */
8512
+ async function summarizeInWaves(directories, options) {
8513
+ const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, } = options;
8514
+ let totalTokenCount = initialTotal;
8515
+ const results = [...directories];
8516
+ // Create sorted indices by token count (descending) for prioritized processing
8517
+ const sortedIndices = directories
8518
+ .map((d, i) => ({ index: i, tokens: d.tokenCount }))
8519
+ .sort((a, b) => b.tokens - a.tokens);
8520
+ let cursor = 0;
8521
+ while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
8522
+ // Select wave candidates: directories that exceed minTokensForSummary
8523
+ const wave = [];
8524
+ for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
8525
+ const { index, tokens } = sortedIndices[i];
8526
+ // Skip directories below the minimum threshold
8527
+ if (tokens < minTokensForSummary) {
8528
+ cursor = i + 1;
8529
+ continue;
8530
+ }
8531
+ // Skip directories that have already been summarized
8532
+ if (results[index].summary) {
8533
+ cursor = i + 1;
8534
+ continue;
8535
+ }
8536
+ wave.push(index);
8537
+ cursor = i + 1;
8538
+ }
8539
+ // No more eligible candidates
8540
+ if (wave.length === 0) {
8541
+ break;
8542
+ }
8543
+ logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
8544
+ // Process wave in parallel
8545
+ const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer })));
8546
+ // Update results and recalculate total
8547
+ waveResults.forEach((result, i) => {
8548
+ const idx = wave[i];
8549
+ const originalTokens = results[idx].tokenCount;
8550
+ const newTokens = result.tokenCount;
8551
+ const reduction = originalTokens - newTokens;
8552
+ totalTokenCount -= reduction;
8553
+ results[idx] = result;
8554
+ logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
8555
+ color: 'magenta',
8556
+ });
8557
+ });
8558
+ logger.verbose(`Total token count: ${totalTokenCount}`, {
8559
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8560
+ });
8561
+ // Check if we're now under budget
8562
+ if (totalTokenCount <= maxTokens) {
8563
+ logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
8564
+ break;
8565
+ }
8566
+ }
8567
+ return { directories: results, totalTokenCount };
8568
+ }
8569
+ /**
8570
+ * Summarize diffs using a three-phase approach:
8571
+ *
8572
+ * Phase 1: Pre-process large files to prevent any single file from dominating
8573
+ * Phase 2: Group diffs by directory and assess total token count
8574
+ * Phase 3: Wave-based parallel summarization until under budget
8575
+ *
8576
+ * This approach ensures:
8577
+ * - Large files don't bias the summary
8578
+ * - Small changes preserve their detail (minTokensForSummary threshold)
8579
+ * - Efficient parallel processing with predictable behavior
8580
+ * - Early exit when under token budget
8581
+ */
8582
+ async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, handleOutput = defaultOutputCallback, }) {
8583
+ // Calculate maxFileTokens as 25% of maxTokens if not specified
8584
+ const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8585
+ // PHASE 1: Pre-process large files
8586
+ logger.startTimer().startSpinner(`Pre-processing large files...`, { color: 'blue' });
8587
+ const preprocessedNode = await preprocessLargeFiles(rootDiffNode, {
8588
+ maxFileTokens: effectiveMaxFileTokens,
8589
+ minTokensForSummary,
8590
+ maxConcurrent,
8591
+ tokenizer,
8592
+ logger,
8593
+ chain,
8594
+ textSplitter,
8595
+ });
8596
+ logger.stopSpinner('Files pre-processed').stopTimer();
8597
+ // PHASE 2: Directory grouping & assessment
8307
8598
  logger.startTimer().startSpinner(`Organizing Diffs...`, { color: 'blue' });
8308
- const directoryDiffs = createDirectoryDiffs(rootDiffNode);
8309
- // Sort by token count descending
8599
+ const directoryDiffs = createDirectoryDiffs(preprocessedNode);
8600
+ // Sort by token count descending for consistent output ordering
8310
8601
  directoryDiffs.sort((a, b) => b.tokenCount - a.tokenCount);
8311
- let totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8602
+ const totalTokenCount = directoryDiffs.reduce((sum, group) => sum + group.tokenCount, 0);
8312
8603
  logger.stopSpinner('Diffs Organized').stopTimer();
8313
- logger.startSpinner(`Consolidating Diffs`, { color: 'blue' });
8314
- const processingTasks = directoryDiffs.map((group, i) => {
8315
- return queue.add(async () => {
8316
- // If the diff token count is already less than the average req, we can skip summarizing.
8317
- const isLessThanAvgTokenReq = group.tokenCount <= maxTokens / directoryDiffs.length;
8318
- if (totalTokenCount <= maxTokens || isLessThanAvgTokenReq) {
8319
- return group;
8320
- }
8321
- group = await summarizeDirectoryDiff(group, {
8322
- chain,
8323
- textSplitter,
8324
- tokenizer,
8325
- });
8326
- // We need to subtract the old token count and add the new one
8327
- totalTokenCount = totalTokenCount - directoryDiffs[i].tokenCount + group.tokenCount;
8328
- directoryDiffs[i] = group;
8329
- logger
8330
- .verbose(`\n • Summarized diffs in "/${group.path}" `, { color: 'blue' })
8331
- .verbose(`\nTotal token count: ${totalTokenCount}`, {
8332
- color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8333
- });
8334
- return group;
8335
- }, { priority: group.tokenCount });
8604
+ logger.verbose(`Total token count: ${totalTokenCount}, max allowed: ${maxTokens}`, {
8605
+ color: totalTokenCount > maxTokens ? 'yellow' : 'green',
8606
+ });
8607
+ // Early exit if already under budget
8608
+ if (totalTokenCount <= maxTokens) {
8609
+ logger.verbose(`Already under token budget, skipping summarization.`, { color: 'green' });
8610
+ return directoryDiffs.map(handleOutput).join('');
8611
+ }
8612
+ // PHASE 3: Wave-based summarization
8613
+ logger.startTimer().startSpinner(`Consolidating Diffs...`, { color: 'blue' });
8614
+ const { directories: summarizedDiffs } = await summarizeInWaves(directoryDiffs, {
8615
+ totalTokenCount,
8616
+ maxTokens,
8617
+ minTokensForSummary,
8618
+ maxConcurrent,
8619
+ logger,
8620
+ chain,
8621
+ textSplitter,
8622
+ tokenizer,
8336
8623
  });
8337
- await Promise.all(processingTasks);
8338
- logger.stopSpinner(`Summarized Diffs`);
8339
- return directoryDiffs.map(handleOutput).join('');
8624
+ logger.stopSpinner(`Diffs Consolidated`).stopTimer();
8625
+ return summarizedDiffs.map(handleOutput).join('');
8340
8626
  }
8341
8627
 
8342
8628
  /**
@@ -11336,7 +11622,7 @@ for (var i = 0; i < 256; i++) {
11336
11622
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11337
11623
  }
11338
11624
 
11339
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens }, }) {
11625
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, }, }) {
11340
11626
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11341
11627
  const summarizationChain = loadSummarizationChain(model, {
11342
11628
  type: 'map_reduce',
@@ -11350,11 +11636,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11350
11636
  logger.startTimer().startSpinner(`Collecting Diffs...\n`, { color: 'blue' });
11351
11637
  const diffs = await collectDiffs(rootTreeNode, (path) => getDiff(path, commit, { git, logger }), tokenizer, logger);
11352
11638
  logger.stopSpinner('Diffs Collected').stopTimer();
11353
- // Summarize diffs
11639
+ // Summarize diffs using three-phase approach:
11640
+ // 1. Pre-process large files to prevent bias
11641
+ // 2. Group by directory and assess token count
11642
+ // 3. Wave-based parallel summarization until under budget
11354
11643
  logger.startTimer();
11355
11644
  const summary = await summarizeDiffs(diffs, {
11356
11645
  tokenizer,
11357
- maxTokens: maxTokens || 4096,
11646
+ maxTokens: maxTokens || 2048,
11647
+ minTokensForSummary,
11648
+ maxFileTokens,
11649
+ maxConcurrent,
11358
11650
  textSplitter,
11359
11651
  chain: summarizationChain,
11360
11652
  logger,
@@ -11657,7 +11949,16 @@ const handler$3 = async (argv, logger) => {
11657
11949
  return await fileChangeParser({
11658
11950
  changes,
11659
11951
  commit: '--staged',
11660
- options: { tokenizer, git, llm, logger, maxTokens: config.service.tokenLimit },
11952
+ options: {
11953
+ tokenizer,
11954
+ git,
11955
+ llm,
11956
+ logger,
11957
+ maxTokens: config.service.tokenLimit,
11958
+ minTokensForSummary: config.service.minTokensForSummary,
11959
+ maxFileTokens: config.service.maxFileTokens,
11960
+ maxConcurrent: config.service.maxConcurrent,
11961
+ },
11661
11962
  });
11662
11963
  }
11663
11964
  const commitMsg = await generateAndReviewLoop({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "git-coco",
3
- "version": "0.24.0",
3
+ "version": "0.26.0",
4
4
  "description": "zero-effort git commits with coco.",
5
5
  "author": "gfargo <ghfargo@gmail.com>",
6
6
  "license": "MIT",
@@ -53,7 +53,7 @@
53
53
  "@types/diff": "^8.0.0",
54
54
  "@types/ini": "^4.1.1",
55
55
  "@types/jest": "^30.0.0",
56
- "@types/node": "^24.0.8",
56
+ "@types/node": "^25.0.10",
57
57
  "@types/yargs": "^17.0.33",
58
58
  "@typescript-eslint/eslint-plugin": "^7.13.1",
59
59
  "@typescript-eslint/parser": "^7.13.1",
@@ -77,20 +77,20 @@
77
77
  "@commitlint/core": "^19.8.0",
78
78
  "@inquirer/prompts": "3.3.0",
79
79
  "@langchain/anthropic": "^0.3.14",
80
- "@langchain/community": "^0.3.32",
81
- "@langchain/core": "^0.3.40",
80
+ "@langchain/community": "^0.3.58",
81
+ "@langchain/core": "^0.3.80",
82
82
  "@langchain/ollama": "^0.2.0",
83
83
  "@langchain/openai": "^0.6.7",
84
84
  "ajv": "^8.16.0",
85
85
  "chalk": "4.1.2",
86
- "diff": "8.0.2",
86
+ "diff": "8.0.3",
87
87
  "ini": "5.0.0",
88
88
  "minimatch": "^9.0.5",
89
89
  "ora": "5.4.1",
90
90
  "p-queue": "5.0.0",
91
91
  "performance-now": "2.1.0",
92
92
  "pretty-ms": "7.0.1",
93
- "simple-git": "3.28.0",
93
+ "simple-git": "3.30.0",
94
94
  "tiktoken": "^1.0.21",
95
95
  "yargs": "17.7.2"
96
96
  },