git-coco 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ import { pathToFileURL } from 'url';
54
54
  /**
55
55
  * Current build version from package.json
56
56
  */
57
- const BUILD_VERSION = "0.43.0";
57
+ const BUILD_VERSION = "0.45.0";
58
58
 
59
59
  const isInteractive = (config) => {
60
60
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -1228,6 +1228,18 @@ const schema$1 = {
1228
1228
  "$ref": "#/definitions/DynamicModelPreference",
1229
1229
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1230
1230
  "default": "balanced"
1231
+ },
1232
+ "fastPath": {
1233
+ "type": "object",
1234
+ "properties": {
1235
+ "markdown": {
1236
+ "type": "boolean",
1237
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1238
+ "default": false
1239
+ }
1240
+ },
1241
+ "additionalProperties": false,
1242
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1231
1243
  }
1232
1244
  },
1233
1245
  "required": [
@@ -1641,6 +1653,18 @@ const schema$1 = {
1641
1653
  "$ref": "#/definitions/DynamicModelPreference",
1642
1654
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1643
1655
  "default": "balanced"
1656
+ },
1657
+ "fastPath": {
1658
+ "type": "object",
1659
+ "properties": {
1660
+ "markdown": {
1661
+ "type": "boolean",
1662
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1663
+ "default": false
1664
+ }
1665
+ },
1666
+ "additionalProperties": false,
1667
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1644
1668
  }
1645
1669
  },
1646
1670
  "required": [
@@ -1797,6 +1821,18 @@ const schema$1 = {
1797
1821
  "$ref": "#/definitions/DynamicModelPreference",
1798
1822
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1799
1823
  "default": "balanced"
1824
+ },
1825
+ "fastPath": {
1826
+ "type": "object",
1827
+ "properties": {
1828
+ "markdown": {
1829
+ "type": "boolean",
1830
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1831
+ "default": false
1832
+ }
1833
+ },
1834
+ "additionalProperties": false,
1835
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1800
1836
  }
1801
1837
  },
1802
1838
  "required": [
@@ -7890,6 +7926,109 @@ async function summarize(documents, { chain, textSplitter, options, logger, toke
7890
7926
  return res.text && res.text.trim();
7891
7927
  }
7892
7928
 
7929
+ /**
7930
+ * Markdown-aware fast path (#861, angle 5). For modification diffs to
7931
+ * `.md` / `.mdx` / `.markdown` files, build a templated summary from
7932
+ * the changed structure (added / removed / updated headings) instead
7933
+ * of paying for an LLM call. Mirrors `trivialDiff` from #845: a deterministic
7934
+ * skip when the diff's meaning is captured by its shape.
7935
+ *
7936
+ * Quality / cost trade-off, on purpose: LLM summaries of markdown edits
7937
+ * are wordier ("expanded the configuration section with new examples,
7938
+ * fixed typos in troubleshooting") but most of that detail isn't load-
7939
+ * bearing for a commit message. The templated summary names the
7940
+ * structural changes (which sections moved) plus a +/- line count, and
7941
+ * defers to the LLM only when the diff has no clear structural signals
7942
+ * (paragraph-only edits, where a templated summary would actually drop
7943
+ * useful context).
7944
+ */
7945
+ const MARKDOWN_EXTENSIONS = ['.md', '.markdown', '.mdx'];
7946
+ const MAX_HEADINGS_PER_BUCKET = 6;
7947
+ function isMarkdownFile(path) {
7948
+ const lower = path.toLowerCase();
7949
+ return MARKDOWN_EXTENSIONS.some((ext) => lower.endsWith(ext));
7950
+ }
7951
+ function summarizeMarkdownDiff(fileDiff) {
7952
+ if (!isMarkdownFile(fileDiff.file))
7953
+ return undefined;
7954
+ const addedHeadings = new Set();
7955
+ const removedHeadings = new Set();
7956
+ let addedLines = 0;
7957
+ let removedLines = 0;
7958
+ for (const line of fileDiff.diff.split('\n')) {
7959
+ if (isHeaderLine$1(line))
7960
+ continue;
7961
+ if (line.startsWith('+')) {
7962
+ addedLines++;
7963
+ const heading = parseHeading(line.slice(1));
7964
+ if (heading)
7965
+ addedHeadings.add(heading);
7966
+ }
7967
+ else if (line.startsWith('-')) {
7968
+ removedLines++;
7969
+ const heading = parseHeading(line.slice(1));
7970
+ if (heading)
7971
+ removedHeadings.add(heading);
7972
+ }
7973
+ }
7974
+ // No content change → nothing to summarize. Caller falls through.
7975
+ if (addedLines === 0 && removedLines === 0)
7976
+ return undefined;
7977
+ // No structural signal → fall through to LLM. We only fast-path
7978
+ // when the diff has heading-level changes; pure paragraph edits go
7979
+ // to the LLM so the summary keeps its detail.
7980
+ if (addedHeadings.size === 0 && removedHeadings.size === 0) {
7981
+ return undefined;
7982
+ }
7983
+ // A heading that appears in both buckets is likely an update (kept
7984
+ // around but its body changed) rather than two distinct events.
7985
+ // The naive split-by-bucket diff format used by git emits the old
7986
+ // text under `-` and the new text under `+`; an unchanged heading
7987
+ // line shouldn't show up in either bucket via the standard hunk
7988
+ // path, but defensively de-dupe in case the diff producer emits
7989
+ // surrounding context as +/-.
7990
+ const updated = new Set([...addedHeadings].filter((h) => removedHeadings.has(h)));
7991
+ const purelyAdded = [...addedHeadings].filter((h) => !updated.has(h));
7992
+ const purelyRemoved = [...removedHeadings].filter((h) => !updated.has(h));
7993
+ const parts = [`Updated markdown \`${fileDiff.file}\``];
7994
+ if (purelyAdded.length) {
7995
+ parts.push(`new sections: ${formatHeadingList(purelyAdded)}`);
7996
+ }
7997
+ if (purelyRemoved.length) {
7998
+ parts.push(`removed sections: ${formatHeadingList(purelyRemoved)}`);
7999
+ }
8000
+ if (updated.size) {
8001
+ parts.push(`updated sections: ${formatHeadingList([...updated])}`);
8002
+ }
8003
+ parts.push(`+${addedLines}/-${removedLines} lines`);
8004
+ return `${parts.join('. ')}.`;
8005
+ }
8006
+ function formatHeadingList(headings) {
8007
+ if (headings.length <= MAX_HEADINGS_PER_BUCKET) {
8008
+ return headings.join(', ');
8009
+ }
8010
+ const shown = headings.slice(0, MAX_HEADINGS_PER_BUCKET);
8011
+ const remainder = headings.length - shown.length;
8012
+ return `${shown.join(', ')} (+${remainder} more)`;
8013
+ }
8014
+ function isHeaderLine$1(line) {
8015
+ return (line.startsWith('diff --git') ||
8016
+ line.startsWith('index ') ||
8017
+ line.startsWith('--- ') ||
8018
+ line.startsWith('+++ ') ||
8019
+ line.startsWith('@@') ||
8020
+ line.startsWith('new file mode') ||
8021
+ line.startsWith('deleted file mode') ||
8022
+ line.startsWith('similarity index') ||
8023
+ line.startsWith('rename from ') ||
8024
+ line.startsWith('rename to ') ||
8025
+ line.startsWith('Binary files '));
8026
+ }
8027
+ function parseHeading(line) {
8028
+ const match = line.match(/^#{1,6}\s+(.+?)\s*$/);
8029
+ return match ? match[1].trim() : undefined;
8030
+ }
8031
+
7893
8032
  /**
7894
8033
  * Inspect a unified-diff string and report its shape, or undefined
7895
8034
  * if the diff isn't trivial (mixed +/- lines, weird headers, etc.).
@@ -8027,7 +8166,7 @@ function isCacheEnabled$1() {
8027
8166
  * synthetic summaries usually drop the directory token totals under
8028
8167
  * budget so wave consolidation skips too.
8029
8168
  */
8030
- async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, }) {
8169
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, fastPath, }) {
8031
8170
  const trivialSummary = summarizeTrivialDiff(fileDiff);
8032
8171
  if (trivialSummary !== undefined) {
8033
8172
  logger.verbose(` - ${fileDiff.file}: trivial-shape skip (no LLM call)`, { color: 'gray' });
@@ -8037,6 +8176,25 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
8037
8176
  tokenCount: tokenizer(trivialSummary),
8038
8177
  };
8039
8178
  }
8179
+ // Markdown fast path (#861, angle 5). Opt-in via `fastPath.markdown`
8180
+ // because it's a lossy optimization: the templated summary names
8181
+ // structural changes only and drops body-text detail that an LLM
8182
+ // summary would carry. Off by default; users who prefer summary
8183
+ // fidelity over speed (which is the safer default for commit-message
8184
+ // generation downstream) keep the LLM path. When the flag IS on, the
8185
+ // fast path still falls through to the LLM for paragraph-only edits
8186
+ // where a templated summary would lose useful context.
8187
+ if (fastPath?.markdown) {
8188
+ const markdownSummary = summarizeMarkdownDiff(fileDiff);
8189
+ if (markdownSummary !== undefined) {
8190
+ logger.verbose(` - ${fileDiff.file}: markdown fast-path skip (no LLM call)`, { color: 'gray' });
8191
+ return {
8192
+ ...fileDiff,
8193
+ diff: markdownSummary,
8194
+ tokenCount: tokenizer(markdownSummary),
8195
+ };
8196
+ }
8197
+ }
8040
8198
  // Cache lookup (#845, PR 5). Keyed on the file's literal diff
8041
8199
  // content + the active model + the summarization prompt hash.
8042
8200
  // A hit returns the prior summary instantly; on iterative
@@ -8148,7 +8306,7 @@ function createLimit$2(maxConcurrent) {
8148
8306
  * @returns Array of file diffs with large files summarized
8149
8307
  */
8150
8308
  async function summarizeLargeFiles(diffs, options) {
8151
- const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter, metadata } = options;
8309
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, maxTokens, fastPath, tokenizer, logger, chain, textSplitter, metadata, } = options;
8152
8310
  // Identify files that need summarization
8153
8311
  const filesToSummarize = [];
8154
8312
  const results = [...diffs];
@@ -8160,17 +8318,57 @@ async function summarizeLargeFiles(diffs, options) {
8160
8318
  if (filesToSummarize.length === 0) {
8161
8319
  return results;
8162
8320
  }
8163
- logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8164
- // Process large files in waves
8165
- const summarizedFiles = await processInWaves$1(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer, logger, metadata }), maxConcurrent);
8166
- // Update results with summarized files
8167
- summarizedFiles.forEach((summarizedDiff, i) => {
8321
+ // Incremental termination (#861, PR 1). When the caller supplies a
8322
+ // budget, dispatch biggest-first and re-check the running total per
8323
+ // dispatch once earlier completions drop the total under maxTokens,
8324
+ // the remaining queued files skip the LLM and keep their raw diffs.
8325
+ // Mirrors the Phase 3 pattern in `summarizeDiffs.ts`. Without a
8326
+ // budget (undefined), behavior matches the prior path: every
8327
+ // eligible file is summarized regardless.
8328
+ filesToSummarize.sort((a, b) => b.diff.tokenCount - a.diff.tokenCount);
8329
+ const incrementalTermination = maxTokens !== undefined;
8330
+ let runningTotal = diffs.reduce((sum, diff) => sum + diff.tokenCount, 0);
8331
+ let summarizedCount = 0;
8332
+ let skippedCount = 0;
8333
+ logger.verbose(`Pre-summarizing up to ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8334
+ const processed = await processInWaves$1(filesToSummarize, async ({ diff }) => {
8335
+ // Re-check the budget at dispatch time when the caller supplied
8336
+ // one. Earlier completions may have already dropped the total
8337
+ // under the cap; in that case skip the LLM call entirely and
8338
+ // keep the raw diff. Without a budget, every eligible file is
8339
+ // summarized (preserves the prior behavior).
8340
+ if (incrementalTermination && runningTotal <= maxTokens) {
8341
+ return { diff, summarized: false };
8342
+ }
8343
+ const summarized = await summarizeFileDiff(diff, {
8344
+ chain,
8345
+ textSplitter,
8346
+ tokenizer,
8347
+ logger,
8348
+ metadata,
8349
+ fastPath,
8350
+ });
8351
+ const delta = diff.tokenCount - summarized.tokenCount;
8352
+ if (delta > 0) {
8353
+ runningTotal -= delta;
8354
+ }
8355
+ return { diff: summarized, summarized: true };
8356
+ }, maxConcurrent);
8357
+ processed.forEach((entry, i) => {
8168
8358
  const originalIndex = filesToSummarize[i].index;
8359
+ if (!entry.summarized) {
8360
+ skippedCount++;
8361
+ return;
8362
+ }
8363
+ summarizedCount++;
8169
8364
  const originalTokens = results[originalIndex].tokenCount;
8170
- const newTokens = summarizedDiff.tokenCount;
8171
- logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8172
- results[originalIndex] = summarizedDiff;
8365
+ const newTokens = entry.diff.tokenCount;
8366
+ logger.verbose(` - ${entry.diff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8367
+ results[originalIndex] = entry.diff;
8173
8368
  });
8369
+ if (skippedCount > 0) {
8370
+ logger.verbose(`Skipped ${skippedCount} pre-summary call(s) — token budget already met after ${summarizedCount} earlier file(s)`, { color: 'cyan' });
8371
+ }
8174
8372
  return results;
8175
8373
  }
8176
8374
  /**
@@ -8436,7 +8634,7 @@ async function summarizeDiffs(rootDiffNode, { tokenizer, logger,
8436
8634
  // with the service defaults means a caller that omits
8437
8635
  // `maxTokens` doesn't accidentally fall into a tighter budget
8438
8636
  // than the rest of the system assumes.
8439
- maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
8637
+ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, fastPath, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
8440
8638
  // Calculate maxFileTokens as 25% of maxTokens if not specified
8441
8639
  const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8442
8640
  // PHASE 1: Directory grouping & assessment
@@ -8460,6 +8658,13 @@ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, t
8460
8658
  maxFileTokens: effectiveMaxFileTokens,
8461
8659
  minTokensForSummary,
8462
8660
  maxConcurrent,
8661
+ // #861, PR 1: pass the overall budget so Phase 2 can short-circuit
8662
+ // once earlier completions drop the running total under the cap.
8663
+ maxTokens,
8664
+ // #861, angle 5: opt-in markdown fast path. Off by default; when
8665
+ // enabled, markdown modification diffs with structural signals
8666
+ // resolve via a templated extract instead of an LLM call.
8667
+ fastPath,
8463
8668
  tokenizer,
8464
8669
  logger,
8465
8670
  chain,
@@ -11437,7 +11642,7 @@ for (var i = 0; i < 256; i++) {
11437
11642
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11438
11643
  }
11439
11644
 
11440
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, metadata, }, }) {
11645
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, fastPath, metadata, }, }) {
11441
11646
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11442
11647
  const summarizationChain = loadSummarizationChain(model, {
11443
11648
  type: 'map_reduce',
@@ -11469,6 +11674,7 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11469
11674
  minTokensForSummary,
11470
11675
  maxFileTokens,
11471
11676
  maxConcurrent,
11677
+ fastPath,
11472
11678
  textSplitter,
11473
11679
  chain: summarizationChain,
11474
11680
  logger,
@@ -11488,6 +11694,7 @@ function createFileChangeParserOptions({ command, git, llm, logger, model, provi
11488
11694
  minTokensForSummary: service?.minTokensForSummary,
11489
11695
  maxFileTokens: service?.maxFileTokens,
11490
11696
  maxConcurrent: service?.maxConcurrent,
11697
+ fastPath: service?.fastPath,
11491
11698
  metadata: {
11492
11699
  command,
11493
11700
  provider,
@@ -12415,6 +12622,164 @@ const CommitSplitPlanSchema = objectType({
12415
12622
  }))
12416
12623
  .min(1),
12417
12624
  });
12625
+
12626
+ const getGroupFiles$1 = (group) => group.files || [];
12627
+ const getGroupHunks$1 = (group) => group.hunks || [];
12628
+ function getPlanValidationIssues(plan, staged, hunkInventory) {
12629
+ const stagedFiles = new Set(staged.map((change) => change.filePath));
12630
+ const seen = new Set();
12631
+ const seenHunks = new Set();
12632
+ const unknownFiles = [];
12633
+ const duplicateFiles = [];
12634
+ const unknownHunks = [];
12635
+ const duplicateHunks = [];
12636
+ plan.groups.forEach((group) => {
12637
+ getGroupFiles$1(group).forEach((file) => {
12638
+ if (!stagedFiles.has(file)) {
12639
+ unknownFiles.push(file);
12640
+ return;
12641
+ }
12642
+ if (seen.has(file)) {
12643
+ duplicateFiles.push(file);
12644
+ return;
12645
+ }
12646
+ seen.add(file);
12647
+ });
12648
+ getGroupHunks$1(group).forEach((hunkId) => {
12649
+ const hunk = hunkInventory?.byId.get(hunkId);
12650
+ if (!hunk) {
12651
+ unknownHunks.push(hunkId);
12652
+ return;
12653
+ }
12654
+ if (seenHunks.has(hunkId)) {
12655
+ duplicateHunks.push(hunkId);
12656
+ return;
12657
+ }
12658
+ seenHunks.add(hunkId);
12659
+ });
12660
+ });
12661
+ const hunkCoveredFiles = new Set([...seenHunks].map((hunkId) => hunkInventory?.byId.get(hunkId)?.filePath));
12662
+ const mixedFiles = [...seen].filter((file) => hunkCoveredFiles.has(file));
12663
+ const partiallyCoveredFiles = [...hunkCoveredFiles]
12664
+ .filter((file) => Boolean(file))
12665
+ .filter((file) => {
12666
+ const fileHunks = hunkInventory?.byFile.get(file) || [];
12667
+ return fileHunks.some((hunk) => !seenHunks.has(hunk.id));
12668
+ });
12669
+ const missingFiles = [...stagedFiles].filter((file) => !seen.has(file) && !hunkCoveredFiles.has(file));
12670
+ return {
12671
+ unknownFiles,
12672
+ duplicateFiles,
12673
+ unknownHunks,
12674
+ duplicateHunks,
12675
+ mixedFiles,
12676
+ partiallyCoveredFiles,
12677
+ missingFiles,
12678
+ };
12679
+ }
12680
+ function hasPlanValidationIssues(issues) {
12681
+ return (issues.unknownFiles.length > 0 ||
12682
+ issues.duplicateFiles.length > 0 ||
12683
+ issues.unknownHunks.length > 0 ||
12684
+ issues.duplicateHunks.length > 0 ||
12685
+ issues.mixedFiles.length > 0 ||
12686
+ issues.partiallyCoveredFiles.length > 0 ||
12687
+ issues.missingFiles.length > 0);
12688
+ }
12689
+ function formatPlanValidationIssuesError(issues) {
12690
+ return [
12691
+ issues.unknownFiles.length ? `unknown files: ${issues.unknownFiles.join(', ')}` : undefined,
12692
+ issues.duplicateFiles.length
12693
+ ? `duplicate files: ${issues.duplicateFiles.join(', ')}`
12694
+ : undefined,
12695
+ issues.unknownHunks.length ? `unknown hunks: ${issues.unknownHunks.join(', ')}` : undefined,
12696
+ issues.duplicateHunks.length
12697
+ ? `duplicate hunks: ${issues.duplicateHunks.join(', ')}`
12698
+ : undefined,
12699
+ issues.mixedFiles.length
12700
+ ? `files assigned both as whole files and hunks: ${issues.mixedFiles.join(', ')}`
12701
+ : undefined,
12702
+ issues.partiallyCoveredFiles.length
12703
+ ? `files with only some hunks assigned: ${issues.partiallyCoveredFiles.join(', ')}`
12704
+ : undefined,
12705
+ issues.missingFiles.length ? `missing files: ${issues.missingFiles.join(', ')}` : undefined,
12706
+ ]
12707
+ .filter(Boolean)
12708
+ .join('; ');
12709
+ }
12710
+ function formatPlanValidationFeedback(issues) {
12711
+ const sections = [];
12712
+ if (issues.unknownFiles.length) {
12713
+ sections.push(`Files referenced that are NOT in the staged file inventory (remove or replace): ${issues.unknownFiles.join(', ')}`);
12714
+ }
12715
+ if (issues.duplicateFiles.length) {
12716
+ sections.push(`Files assigned to more than one group (each file may appear at most once): ${issues.duplicateFiles.join(', ')}`);
12717
+ }
12718
+ if (issues.unknownHunks.length) {
12719
+ sections.push(`Hunk IDs referenced that are NOT in the staged hunk inventory: ${issues.unknownHunks.join(', ')}`);
12720
+ }
12721
+ if (issues.duplicateHunks.length) {
12722
+ sections.push(`Hunk IDs assigned to more than one group (each hunk may appear at most once): ${issues.duplicateHunks.join(', ')}`);
12723
+ }
12724
+ if (issues.mixedFiles.length) {
12725
+ sections.push(`Files assigned BOTH as whole files and via hunks (pick one mode per file): ${issues.mixedFiles.join(', ')}`);
12726
+ }
12727
+ if (issues.partiallyCoveredFiles.length) {
12728
+ sections.push(`Files with only some hunks assigned (every hunk for these files must be covered): ${issues.partiallyCoveredFiles.join(', ')}`);
12729
+ }
12730
+ if (issues.missingFiles.length) {
12731
+ sections.push(`Staged files missing from every group (must appear exactly once): ${issues.missingFiles.join(', ')}`);
12732
+ }
12733
+ return sections.map((section) => `- ${section}`).join('\n');
12734
+ }
12735
+
12736
+ const NO_PREVIOUS_FEEDBACK_PLACEHOLDER = 'None — this is the first attempt.';
12737
+ const DEFAULT_MAX_PLAN_ATTEMPTS = 3;
12738
+ /**
12739
+ * Generate a commit-split plan with self-correcting retries on validator failures.
12740
+ *
12741
+ * The first attempt runs as normal. If `validatePlanForStagedFiles` rejects the result,
12742
+ * the validator's complaints are formatted as natural-language feedback and fed back
12743
+ * into the same prompt template (`previous_attempt_feedback` slot) so the model can
12744
+ * fix its own mistakes without re-running pre-processing.
12745
+ */
12746
+ async function generateValidatedCommitSplitPlan({ llm, prompt, variables, staged, hunkInventory, logger, tokenizer, metadata = {}, maxAttempts = DEFAULT_MAX_PLAN_ATTEMPTS, }) {
12747
+ let lastIssues = null;
12748
+ let attempt = 0;
12749
+ while (attempt < maxAttempts) {
12750
+ attempt++;
12751
+ const previousFeedback = lastIssues
12752
+ ? formatPlanValidationFeedback(lastIssues)
12753
+ : NO_PREVIOUS_FEEDBACK_PLACEHOLDER;
12754
+ const plan = await executeChainWithSchema(CommitSplitPlanSchema, llm, prompt, {
12755
+ ...variables,
12756
+ previous_attempt_feedback: previousFeedback,
12757
+ }, {
12758
+ logger,
12759
+ tokenizer,
12760
+ metadata: {
12761
+ task: 'commit-split-plan',
12762
+ ...metadata,
12763
+ planAttempt: attempt,
12764
+ },
12765
+ });
12766
+ const issues = getPlanValidationIssues(plan, staged, hunkInventory);
12767
+ if (!hasPlanValidationIssues(issues)) {
12768
+ if (attempt > 1 && logger) {
12769
+ logger.verbose(`Plan validated after ${attempt} attempts.`, { color: 'green' });
12770
+ }
12771
+ return { plan, attempts: attempt };
12772
+ }
12773
+ lastIssues = issues;
12774
+ if (logger) {
12775
+ logger.verbose(`Plan attempt ${attempt}/${maxAttempts} failed validation: ${formatPlanValidationIssuesError(issues)}`, { color: 'yellow' });
12776
+ }
12777
+ }
12778
+ throw new Error(lastIssues
12779
+ ? `Failed to produce a valid commit-split plan after ${maxAttempts} attempts. Final validator issues: ${formatPlanValidationIssuesError(lastIssues)}`
12780
+ : `Failed to produce a valid commit-split plan after ${maxAttempts} attempts.`);
12781
+ }
12782
+
12418
12783
  const COMMIT_SPLIT_PROMPT = PromptTemplate.fromTemplate(`You are helping split staged git changes into a small sequence of coherent commits.
12419
12784
 
12420
12785
  Return ONLY valid JSON matching this schema:
@@ -12431,14 +12796,13 @@ Return ONLY valid JSON matching this schema:
12431
12796
  }}
12432
12797
 
12433
12798
  Rules:
12434
- - Use each staged file exactly once.
12435
- - If a file has hunk IDs and contains unrelated changes, assign every hunk ID exactly once instead of assigning the whole file.
12436
- - Do not list the same file in "files" when assigning that file through "hunks".
12437
- - Only use file paths listed in the staged file inventory.
12438
- - Only use hunk IDs listed in the staged hunk inventory.
12799
+ - Every staged file MUST be assigned exactly once across all groups, either via "files" OR via every one of its hunk IDs (never both).
12800
+ - If you assign any hunk for a file, you MUST assign EVERY hunk for that file across the groups partial coverage is invalid.
12801
+ - Do not list the same file in "files" of more than one group, and do not assign the same hunk ID to more than one group.
12802
+ - Only use file paths listed in the staged file inventory. Do not invent files.
12803
+ - Only use hunk IDs listed in the staged hunk inventory. Do not invent hunk IDs.
12439
12804
  - Prefer 2-5 commits unless the changes are truly all one topic.
12440
12805
  - Keep commit titles concise and understandable.
12441
- - Do not invent files.
12442
12806
 
12443
12807
  Staged file inventory:
12444
12808
  {file_inventory}
@@ -12450,7 +12814,10 @@ Condensed staged diff:
12450
12814
  {summary}
12451
12815
 
12452
12816
  Additional context:
12453
- {additional_context}`);
12817
+ {additional_context}
12818
+
12819
+ Feedback on previous attempt (fix every item before responding):
12820
+ {previous_attempt_feedback}`);
12454
12821
  function isCommitSplitCommand(argv) {
12455
12822
  return Boolean(argv.split || argv.plan || argv.apply || argv._.includes('split'));
12456
12823
  }
@@ -12469,9 +12836,6 @@ function formatCommitSplitPlan(plan) {
12469
12836
  })
12470
12837
  .join('\n\n---\n\n');
12471
12838
  }
12472
- function getStagedFileSet(changes) {
12473
- return new Set(changes.map((change) => change.filePath));
12474
- }
12475
12839
  function getGroupFiles(group) {
12476
12840
  return group.files || [];
12477
12841
  }
@@ -12528,67 +12892,9 @@ function formatHunkInventory(inventory) {
12528
12892
  .join('\n');
12529
12893
  }
12530
12894
  function validatePlanForStagedFiles(plan, staged, hunkInventory) {
12531
- const stagedFiles = getStagedFileSet(staged);
12532
- const seen = new Set();
12533
- const seenHunks = new Set();
12534
- const unknown = [];
12535
- const duplicate = [];
12536
- const unknownHunks = [];
12537
- const duplicateHunks = [];
12538
- plan.groups.forEach((group) => {
12539
- getGroupFiles(group).forEach((file) => {
12540
- if (!stagedFiles.has(file)) {
12541
- unknown.push(file);
12542
- return;
12543
- }
12544
- if (seen.has(file)) {
12545
- duplicate.push(file);
12546
- return;
12547
- }
12548
- seen.add(file);
12549
- });
12550
- getGroupHunks(group).forEach((hunkId) => {
12551
- const hunk = hunkInventory?.byId.get(hunkId);
12552
- if (!hunk) {
12553
- unknownHunks.push(hunkId);
12554
- return;
12555
- }
12556
- if (seenHunks.has(hunkId)) {
12557
- duplicateHunks.push(hunkId);
12558
- return;
12559
- }
12560
- seenHunks.add(hunkId);
12561
- });
12562
- });
12563
- const hunkCoveredFiles = new Set([...seenHunks].map((hunkId) => hunkInventory?.byId.get(hunkId)?.filePath));
12564
- const mixedFiles = [...seen].filter((file) => hunkCoveredFiles.has(file));
12565
- const partiallyCoveredFiles = [...hunkCoveredFiles]
12566
- .filter((file) => Boolean(file))
12567
- .filter((file) => {
12568
- const fileHunks = hunkInventory?.byFile.get(file) || [];
12569
- return fileHunks.some((hunk) => !seenHunks.has(hunk.id));
12570
- });
12571
- const missing = [...stagedFiles].filter((file) => !seen.has(file) && !hunkCoveredFiles.has(file));
12572
- if (unknown.length ||
12573
- duplicate.length ||
12574
- unknownHunks.length ||
12575
- duplicateHunks.length ||
12576
- mixedFiles.length ||
12577
- partiallyCoveredFiles.length ||
12578
- missing.length) {
12579
- throw new Error([
12580
- unknown.length ? `unknown files: ${unknown.join(', ')}` : undefined,
12581
- duplicate.length ? `duplicate files: ${duplicate.join(', ')}` : undefined,
12582
- unknownHunks.length ? `unknown hunks: ${unknownHunks.join(', ')}` : undefined,
12583
- duplicateHunks.length ? `duplicate hunks: ${duplicateHunks.join(', ')}` : undefined,
12584
- mixedFiles.length ? `files assigned both as whole files and hunks: ${mixedFiles.join(', ')}` : undefined,
12585
- partiallyCoveredFiles.length
12586
- ? `files with only some hunks assigned: ${partiallyCoveredFiles.join(', ')}`
12587
- : undefined,
12588
- missing.length ? `missing files: ${missing.join(', ')}` : undefined,
12589
- ]
12590
- .filter(Boolean)
12591
- .join('; '));
12895
+ const issues = getPlanValidationIssues(plan, staged, hunkInventory);
12896
+ if (hasPlanValidationIssues(issues)) {
12897
+ throw new Error(formatPlanValidationIssuesError(issues));
12592
12898
  }
12593
12899
  }
12594
12900
  function assertNoUnstagedOverlap(plan, changes, hunkInventory) {
@@ -12692,22 +12998,26 @@ async function handleCommitSplit({ argv, config, git, logger, tokenizer, llm, })
12692
12998
  .map((change) => `- ${change.filePath}: ${change.status} - ${change.summary}`)
12693
12999
  .join('\n');
12694
13000
  const hunkInventoryText = formatHunkInventory(hunkInventory);
12695
- const plan = await executeChainWithSchema(CommitSplitPlanSchema, llm, COMMIT_SPLIT_PROMPT, {
12696
- file_inventory: fileInventory,
12697
- hunk_inventory: hunkInventoryText,
12698
- summary,
12699
- additional_context: argv.additional || '',
12700
- }, {
13001
+ const { plan } = await generateValidatedCommitSplitPlan({
13002
+ llm,
13003
+ prompt: COMMIT_SPLIT_PROMPT,
13004
+ variables: {
13005
+ file_inventory: fileInventory,
13006
+ hunk_inventory: hunkInventoryText,
13007
+ summary,
13008
+ additional_context: argv.additional || '',
13009
+ },
13010
+ staged: changes.staged,
13011
+ hunkInventory,
12701
13012
  logger,
12702
13013
  tokenizer,
12703
13014
  metadata: {
12704
- task: 'commit-split-plan',
12705
13015
  command: 'commit',
12706
13016
  provider: config.service.provider,
12707
13017
  model: String(config.service.model),
12708
13018
  },
13019
+ maxAttempts: DEFAULT_MAX_PLAN_ATTEMPTS,
12709
13020
  });
12710
- validatePlanForStagedFiles(plan, changes.staged, hunkInventory);
12711
13021
  if (argv.apply) {
12712
13022
  return await applyCommitSplitPlan({
12713
13023
  plan,