git-coco 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -78,7 +78,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
78
78
  /**
79
79
  * Current build version from package.json
80
80
  */
81
- const BUILD_VERSION = "0.43.0";
81
+ const BUILD_VERSION = "0.45.0";
82
82
 
83
83
  const isInteractive = (config) => {
84
84
  return config?.mode === 'interactive' || !!config?.interactive;
@@ -1252,6 +1252,18 @@ const schema$1 = {
1252
1252
  "$ref": "#/definitions/DynamicModelPreference",
1253
1253
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1254
1254
  "default": "balanced"
1255
+ },
1256
+ "fastPath": {
1257
+ "type": "object",
1258
+ "properties": {
1259
+ "markdown": {
1260
+ "type": "boolean",
1261
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1262
+ "default": false
1263
+ }
1264
+ },
1265
+ "additionalProperties": false,
1266
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1255
1267
  }
1256
1268
  },
1257
1269
  "required": [
@@ -1665,6 +1677,18 @@ const schema$1 = {
1665
1677
  "$ref": "#/definitions/DynamicModelPreference",
1666
1678
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1667
1679
  "default": "balanced"
1680
+ },
1681
+ "fastPath": {
1682
+ "type": "object",
1683
+ "properties": {
1684
+ "markdown": {
1685
+ "type": "boolean",
1686
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1687
+ "default": false
1688
+ }
1689
+ },
1690
+ "additionalProperties": false,
1691
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1668
1692
  }
1669
1693
  },
1670
1694
  "required": [
@@ -1821,6 +1845,18 @@ const schema$1 = {
1821
1845
  "$ref": "#/definitions/DynamicModelPreference",
1822
1846
  "description": "Default dynamic routing preference when model is set to \"dynamic\".",
1823
1847
  "default": "balanced"
1848
+ },
1849
+ "fastPath": {
1850
+ "type": "object",
1851
+ "properties": {
1852
+ "markdown": {
1853
+ "type": "boolean",
1854
+ "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
1855
+ "default": false
1856
+ }
1857
+ },
1858
+ "additionalProperties": false,
1859
+ "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
1824
1860
  }
1825
1861
  },
1826
1862
  "required": [
@@ -7914,6 +7950,109 @@ async function summarize(documents$1, { chain, textSplitter, options, logger, to
7914
7950
  return res.text && res.text.trim();
7915
7951
  }
7916
7952
 
7953
+ /**
7954
+ * Markdown-aware fast path (#861, angle 5). For modification diffs to
7955
+ * `.md` / `.mdx` / `.markdown` files, build a templated summary from
7956
+ * the changed structure (added / removed / updated headings) instead
7957
+ * of paying for an LLM call. Mirrors `trivialDiff` from #845: a deterministic
7958
+ * skip when the diff's meaning is captured by its shape.
7959
+ *
7960
+ * Quality / cost trade-off, on purpose: LLM summaries of markdown edits
7961
+ * are wordier ("expanded the configuration section with new examples,
7962
+ * fixed typos in troubleshooting") but most of that detail isn't load-
7963
+ * bearing for a commit message. The templated summary names the
7964
+ * structural changes (which sections moved) plus a +/- line count, and
7965
+ * defers to the LLM only when the diff has no clear structural signals
7966
+ * (paragraph-only edits, where a templated summary would actually drop
7967
+ * useful context).
7968
+ */
7969
+ const MARKDOWN_EXTENSIONS = ['.md', '.markdown', '.mdx'];
7970
+ const MAX_HEADINGS_PER_BUCKET = 6;
7971
+ function isMarkdownFile(path) {
7972
+ const lower = path.toLowerCase();
7973
+ return MARKDOWN_EXTENSIONS.some((ext) => lower.endsWith(ext));
7974
+ }
7975
+ function summarizeMarkdownDiff(fileDiff) {
7976
+ if (!isMarkdownFile(fileDiff.file))
7977
+ return undefined;
7978
+ const addedHeadings = new Set();
7979
+ const removedHeadings = new Set();
7980
+ let addedLines = 0;
7981
+ let removedLines = 0;
7982
+ for (const line of fileDiff.diff.split('\n')) {
7983
+ if (isHeaderLine$1(line))
7984
+ continue;
7985
+ if (line.startsWith('+')) {
7986
+ addedLines++;
7987
+ const heading = parseHeading(line.slice(1));
7988
+ if (heading)
7989
+ addedHeadings.add(heading);
7990
+ }
7991
+ else if (line.startsWith('-')) {
7992
+ removedLines++;
7993
+ const heading = parseHeading(line.slice(1));
7994
+ if (heading)
7995
+ removedHeadings.add(heading);
7996
+ }
7997
+ }
7998
+ // No content change → nothing to summarize. Caller falls through.
7999
+ if (addedLines === 0 && removedLines === 0)
8000
+ return undefined;
8001
+ // No structural signal → fall through to LLM. We only fast-path
8002
+ // when the diff has heading-level changes; pure paragraph edits go
8003
+ // to the LLM so the summary keeps its detail.
8004
+ if (addedHeadings.size === 0 && removedHeadings.size === 0) {
8005
+ return undefined;
8006
+ }
8007
+ // A heading that appears in both buckets is likely an update (kept
8008
+ // around but its body changed) rather than two distinct events.
8009
+ // The naive split-by-bucket diff format used by git emits the old
8010
+ // text under `-` and the new text under `+`; an unchanged heading
8011
+ // line shouldn't show up in either bucket via the standard hunk
8012
+ // path, but defensively de-dupe in case the diff producer emits
8013
+ // surrounding context as +/-.
8014
+ const updated = new Set([...addedHeadings].filter((h) => removedHeadings.has(h)));
8015
+ const purelyAdded = [...addedHeadings].filter((h) => !updated.has(h));
8016
+ const purelyRemoved = [...removedHeadings].filter((h) => !updated.has(h));
8017
+ const parts = [`Updated markdown \`${fileDiff.file}\``];
8018
+ if (purelyAdded.length) {
8019
+ parts.push(`new sections: ${formatHeadingList(purelyAdded)}`);
8020
+ }
8021
+ if (purelyRemoved.length) {
8022
+ parts.push(`removed sections: ${formatHeadingList(purelyRemoved)}`);
8023
+ }
8024
+ if (updated.size) {
8025
+ parts.push(`updated sections: ${formatHeadingList([...updated])}`);
8026
+ }
8027
+ parts.push(`+${addedLines}/-${removedLines} lines`);
8028
+ return `${parts.join('. ')}.`;
8029
+ }
8030
+ function formatHeadingList(headings) {
8031
+ if (headings.length <= MAX_HEADINGS_PER_BUCKET) {
8032
+ return headings.join(', ');
8033
+ }
8034
+ const shown = headings.slice(0, MAX_HEADINGS_PER_BUCKET);
8035
+ const remainder = headings.length - shown.length;
8036
+ return `${shown.join(', ')} (+${remainder} more)`;
8037
+ }
8038
+ function isHeaderLine$1(line) {
8039
+ return (line.startsWith('diff --git') ||
8040
+ line.startsWith('index ') ||
8041
+ line.startsWith('--- ') ||
8042
+ line.startsWith('+++ ') ||
8043
+ line.startsWith('@@') ||
8044
+ line.startsWith('new file mode') ||
8045
+ line.startsWith('deleted file mode') ||
8046
+ line.startsWith('similarity index') ||
8047
+ line.startsWith('rename from ') ||
8048
+ line.startsWith('rename to ') ||
8049
+ line.startsWith('Binary files '));
8050
+ }
8051
+ function parseHeading(line) {
8052
+ const match = line.match(/^#{1,6}\s+(.+?)\s*$/);
8053
+ return match ? match[1].trim() : undefined;
8054
+ }
8055
+
7917
8056
  /**
7918
8057
  * Inspect a unified-diff string and report its shape, or undefined
7919
8058
  * if the diff isn't trivial (mixed +/- lines, weird headers, etc.).
@@ -8051,7 +8190,7 @@ function isCacheEnabled$1() {
8051
8190
  * synthetic summaries usually drop the directory token totals under
8052
8191
  * budget so wave consolidation skips too.
8053
8192
  */
8054
- async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, }) {
8193
+ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, fastPath, }) {
8055
8194
  const trivialSummary = summarizeTrivialDiff(fileDiff);
8056
8195
  if (trivialSummary !== undefined) {
8057
8196
  logger.verbose(` - ${fileDiff.file}: trivial-shape skip (no LLM call)`, { color: 'gray' });
@@ -8061,6 +8200,25 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
8061
8200
  tokenCount: tokenizer(trivialSummary),
8062
8201
  };
8063
8202
  }
8203
+ // Markdown fast path (#861, angle 5). Opt-in via `fastPath.markdown`
8204
+ // because it's a lossy optimization: the templated summary names
8205
+ // structural changes only and drops body-text detail that an LLM
8206
+ // summary would carry. Off by default; users who prefer summary
8207
+ // fidelity over speed (which is the safer default for commit-message
8208
+ // generation downstream) keep the LLM path. When the flag IS on, the
8209
+ // fast path still falls through to the LLM for paragraph-only edits
8210
+ // where a templated summary would lose useful context.
8211
+ if (fastPath?.markdown) {
8212
+ const markdownSummary = summarizeMarkdownDiff(fileDiff);
8213
+ if (markdownSummary !== undefined) {
8214
+ logger.verbose(` - ${fileDiff.file}: markdown fast-path skip (no LLM call)`, { color: 'gray' });
8215
+ return {
8216
+ ...fileDiff,
8217
+ diff: markdownSummary,
8218
+ tokenCount: tokenizer(markdownSummary),
8219
+ };
8220
+ }
8221
+ }
8064
8222
  // Cache lookup (#845, PR 5). Keyed on the file's literal diff
8065
8223
  // content + the active model + the summarization prompt hash.
8066
8224
  // A hit returns the prior summary instantly; on iterative
@@ -8172,7 +8330,7 @@ function createLimit$2(maxConcurrent) {
8172
8330
  * @returns Array of file diffs with large files summarized
8173
8331
  */
8174
8332
  async function summarizeLargeFiles(diffs, options) {
8175
- const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter, metadata } = options;
8333
+ const { maxFileTokens, minTokensForSummary, maxConcurrent, maxTokens, fastPath, tokenizer, logger, chain, textSplitter, metadata, } = options;
8176
8334
  // Identify files that need summarization
8177
8335
  const filesToSummarize = [];
8178
8336
  const results = [...diffs];
@@ -8184,17 +8342,57 @@ async function summarizeLargeFiles(diffs, options) {
8184
8342
  if (filesToSummarize.length === 0) {
8185
8343
  return results;
8186
8344
  }
8187
- logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8188
- // Process large files in waves
8189
- const summarizedFiles = await processInWaves$1(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer, logger, metadata }), maxConcurrent);
8190
- // Update results with summarized files
8191
- summarizedFiles.forEach((summarizedDiff, i) => {
8345
+ // Incremental termination (#861, PR 1). When the caller supplies a
8346
+ // budget, dispatch biggest-first and re-check the running total per
8347
+ // dispatch once earlier completions drop the total under maxTokens,
8348
+ // the remaining queued files skip the LLM and keep their raw diffs.
8349
+ // Mirrors the Phase 3 pattern in `summarizeDiffs.ts`. Without a
8350
+ // budget (undefined), behavior matches the prior path: every
8351
+ // eligible file is summarized regardless.
8352
+ filesToSummarize.sort((a, b) => b.diff.tokenCount - a.diff.tokenCount);
8353
+ const incrementalTermination = maxTokens !== undefined;
8354
+ let runningTotal = diffs.reduce((sum, diff) => sum + diff.tokenCount, 0);
8355
+ let summarizedCount = 0;
8356
+ let skippedCount = 0;
8357
+ logger.verbose(`Pre-summarizing up to ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
8358
+ const processed = await processInWaves$1(filesToSummarize, async ({ diff }) => {
8359
+ // Re-check the budget at dispatch time when the caller supplied
8360
+ // one. Earlier completions may have already dropped the total
8361
+ // under the cap; in that case skip the LLM call entirely and
8362
+ // keep the raw diff. Without a budget, every eligible file is
8363
+ // summarized (preserves the prior behavior).
8364
+ if (incrementalTermination && runningTotal <= maxTokens) {
8365
+ return { diff, summarized: false };
8366
+ }
8367
+ const summarized = await summarizeFileDiff(diff, {
8368
+ chain,
8369
+ textSplitter,
8370
+ tokenizer,
8371
+ logger,
8372
+ metadata,
8373
+ fastPath,
8374
+ });
8375
+ const delta = diff.tokenCount - summarized.tokenCount;
8376
+ if (delta > 0) {
8377
+ runningTotal -= delta;
8378
+ }
8379
+ return { diff: summarized, summarized: true };
8380
+ }, maxConcurrent);
8381
+ processed.forEach((entry, i) => {
8192
8382
  const originalIndex = filesToSummarize[i].index;
8383
+ if (!entry.summarized) {
8384
+ skippedCount++;
8385
+ return;
8386
+ }
8387
+ summarizedCount++;
8193
8388
  const originalTokens = results[originalIndex].tokenCount;
8194
- const newTokens = summarizedDiff.tokenCount;
8195
- logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8196
- results[originalIndex] = summarizedDiff;
8389
+ const newTokens = entry.diff.tokenCount;
8390
+ logger.verbose(` - ${entry.diff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
8391
+ results[originalIndex] = entry.diff;
8197
8392
  });
8393
+ if (skippedCount > 0) {
8394
+ logger.verbose(`Skipped ${skippedCount} pre-summary call(s) — token budget already met after ${summarizedCount} earlier file(s)`, { color: 'cyan' });
8395
+ }
8198
8396
  return results;
8199
8397
  }
8200
8398
  /**
@@ -8460,7 +8658,7 @@ async function summarizeDiffs(rootDiffNode, { tokenizer, logger,
8460
8658
  // with the service defaults means a caller that omits
8461
8659
  // `maxTokens` doesn't accidentally fall into a tighter budget
8462
8660
  // than the rest of the system assumes.
8463
- maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
8661
+ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, fastPath, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
8464
8662
  // Calculate maxFileTokens as 25% of maxTokens if not specified
8465
8663
  const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
8466
8664
  // PHASE 1: Directory grouping & assessment
@@ -8484,6 +8682,13 @@ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, t
8484
8682
  maxFileTokens: effectiveMaxFileTokens,
8485
8683
  minTokensForSummary,
8486
8684
  maxConcurrent,
8685
+ // #861, PR 1: pass the overall budget so Phase 2 can short-circuit
8686
+ // once earlier completions drop the running total under the cap.
8687
+ maxTokens,
8688
+ // #861, angle 5: opt-in markdown fast path. Off by default; when
8689
+ // enabled, markdown modification diffs with structural signals
8690
+ // resolve via a templated extract instead of an LLM call.
8691
+ fastPath,
8487
8692
  tokenizer,
8488
8693
  logger,
8489
8694
  chain,
@@ -11461,7 +11666,7 @@ for (var i = 0; i < 256; i++) {
11461
11666
  simpleEscapeMap[i] = simpleEscapeSequence(i);
11462
11667
  }
11463
11668
 
11464
- async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, metadata, }, }) {
11669
+ async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, fastPath, metadata, }, }) {
11465
11670
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
11466
11671
  const summarizationChain = loadSummarizationChain(model, {
11467
11672
  type: 'map_reduce',
@@ -11493,6 +11698,7 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
11493
11698
  minTokensForSummary,
11494
11699
  maxFileTokens,
11495
11700
  maxConcurrent,
11701
+ fastPath,
11496
11702
  textSplitter,
11497
11703
  chain: summarizationChain,
11498
11704
  logger,
@@ -11512,6 +11718,7 @@ function createFileChangeParserOptions({ command, git, llm, logger, model, provi
11512
11718
  minTokensForSummary: service?.minTokensForSummary,
11513
11719
  maxFileTokens: service?.maxFileTokens,
11514
11720
  maxConcurrent: service?.maxConcurrent,
11721
+ fastPath: service?.fastPath,
11515
11722
  metadata: {
11516
11723
  command,
11517
11724
  provider,
@@ -12439,6 +12646,164 @@ const CommitSplitPlanSchema = objectType({
12439
12646
  }))
12440
12647
  .min(1),
12441
12648
  });
12649
+
12650
+ const getGroupFiles$1 = (group) => group.files || [];
12651
+ const getGroupHunks$1 = (group) => group.hunks || [];
12652
+ function getPlanValidationIssues(plan, staged, hunkInventory) {
12653
+ const stagedFiles = new Set(staged.map((change) => change.filePath));
12654
+ const seen = new Set();
12655
+ const seenHunks = new Set();
12656
+ const unknownFiles = [];
12657
+ const duplicateFiles = [];
12658
+ const unknownHunks = [];
12659
+ const duplicateHunks = [];
12660
+ plan.groups.forEach((group) => {
12661
+ getGroupFiles$1(group).forEach((file) => {
12662
+ if (!stagedFiles.has(file)) {
12663
+ unknownFiles.push(file);
12664
+ return;
12665
+ }
12666
+ if (seen.has(file)) {
12667
+ duplicateFiles.push(file);
12668
+ return;
12669
+ }
12670
+ seen.add(file);
12671
+ });
12672
+ getGroupHunks$1(group).forEach((hunkId) => {
12673
+ const hunk = hunkInventory?.byId.get(hunkId);
12674
+ if (!hunk) {
12675
+ unknownHunks.push(hunkId);
12676
+ return;
12677
+ }
12678
+ if (seenHunks.has(hunkId)) {
12679
+ duplicateHunks.push(hunkId);
12680
+ return;
12681
+ }
12682
+ seenHunks.add(hunkId);
12683
+ });
12684
+ });
12685
+ const hunkCoveredFiles = new Set([...seenHunks].map((hunkId) => hunkInventory?.byId.get(hunkId)?.filePath));
12686
+ const mixedFiles = [...seen].filter((file) => hunkCoveredFiles.has(file));
12687
+ const partiallyCoveredFiles = [...hunkCoveredFiles]
12688
+ .filter((file) => Boolean(file))
12689
+ .filter((file) => {
12690
+ const fileHunks = hunkInventory?.byFile.get(file) || [];
12691
+ return fileHunks.some((hunk) => !seenHunks.has(hunk.id));
12692
+ });
12693
+ const missingFiles = [...stagedFiles].filter((file) => !seen.has(file) && !hunkCoveredFiles.has(file));
12694
+ return {
12695
+ unknownFiles,
12696
+ duplicateFiles,
12697
+ unknownHunks,
12698
+ duplicateHunks,
12699
+ mixedFiles,
12700
+ partiallyCoveredFiles,
12701
+ missingFiles,
12702
+ };
12703
+ }
12704
+ function hasPlanValidationIssues(issues) {
12705
+ return (issues.unknownFiles.length > 0 ||
12706
+ issues.duplicateFiles.length > 0 ||
12707
+ issues.unknownHunks.length > 0 ||
12708
+ issues.duplicateHunks.length > 0 ||
12709
+ issues.mixedFiles.length > 0 ||
12710
+ issues.partiallyCoveredFiles.length > 0 ||
12711
+ issues.missingFiles.length > 0);
12712
+ }
12713
+ function formatPlanValidationIssuesError(issues) {
12714
+ return [
12715
+ issues.unknownFiles.length ? `unknown files: ${issues.unknownFiles.join(', ')}` : undefined,
12716
+ issues.duplicateFiles.length
12717
+ ? `duplicate files: ${issues.duplicateFiles.join(', ')}`
12718
+ : undefined,
12719
+ issues.unknownHunks.length ? `unknown hunks: ${issues.unknownHunks.join(', ')}` : undefined,
12720
+ issues.duplicateHunks.length
12721
+ ? `duplicate hunks: ${issues.duplicateHunks.join(', ')}`
12722
+ : undefined,
12723
+ issues.mixedFiles.length
12724
+ ? `files assigned both as whole files and hunks: ${issues.mixedFiles.join(', ')}`
12725
+ : undefined,
12726
+ issues.partiallyCoveredFiles.length
12727
+ ? `files with only some hunks assigned: ${issues.partiallyCoveredFiles.join(', ')}`
12728
+ : undefined,
12729
+ issues.missingFiles.length ? `missing files: ${issues.missingFiles.join(', ')}` : undefined,
12730
+ ]
12731
+ .filter(Boolean)
12732
+ .join('; ');
12733
+ }
12734
+ function formatPlanValidationFeedback(issues) {
12735
+ const sections = [];
12736
+ if (issues.unknownFiles.length) {
12737
+ sections.push(`Files referenced that are NOT in the staged file inventory (remove or replace): ${issues.unknownFiles.join(', ')}`);
12738
+ }
12739
+ if (issues.duplicateFiles.length) {
12740
+ sections.push(`Files assigned to more than one group (each file may appear at most once): ${issues.duplicateFiles.join(', ')}`);
12741
+ }
12742
+ if (issues.unknownHunks.length) {
12743
+ sections.push(`Hunk IDs referenced that are NOT in the staged hunk inventory: ${issues.unknownHunks.join(', ')}`);
12744
+ }
12745
+ if (issues.duplicateHunks.length) {
12746
+ sections.push(`Hunk IDs assigned to more than one group (each hunk may appear at most once): ${issues.duplicateHunks.join(', ')}`);
12747
+ }
12748
+ if (issues.mixedFiles.length) {
12749
+ sections.push(`Files assigned BOTH as whole files and via hunks (pick one mode per file): ${issues.mixedFiles.join(', ')}`);
12750
+ }
12751
+ if (issues.partiallyCoveredFiles.length) {
12752
+ sections.push(`Files with only some hunks assigned (every hunk for these files must be covered): ${issues.partiallyCoveredFiles.join(', ')}`);
12753
+ }
12754
+ if (issues.missingFiles.length) {
12755
+ sections.push(`Staged files missing from every group (must appear exactly once): ${issues.missingFiles.join(', ')}`);
12756
+ }
12757
+ return sections.map((section) => `- ${section}`).join('\n');
12758
+ }
12759
+
12760
+ const NO_PREVIOUS_FEEDBACK_PLACEHOLDER = 'None — this is the first attempt.';
12761
+ const DEFAULT_MAX_PLAN_ATTEMPTS = 3;
12762
+ /**
12763
+ * Generate a commit-split plan with self-correcting retries on validator failures.
12764
+ *
12765
+ * The first attempt runs as normal. If `validatePlanForStagedFiles` rejects the result,
12766
+ * the validator's complaints are formatted as natural-language feedback and fed back
12767
+ * into the same prompt template (`previous_attempt_feedback` slot) so the model can
12768
+ * fix its own mistakes without re-running pre-processing.
12769
+ */
12770
+ async function generateValidatedCommitSplitPlan({ llm, prompt, variables, staged, hunkInventory, logger, tokenizer, metadata = {}, maxAttempts = DEFAULT_MAX_PLAN_ATTEMPTS, }) {
12771
+ let lastIssues = null;
12772
+ let attempt = 0;
12773
+ while (attempt < maxAttempts) {
12774
+ attempt++;
12775
+ const previousFeedback = lastIssues
12776
+ ? formatPlanValidationFeedback(lastIssues)
12777
+ : NO_PREVIOUS_FEEDBACK_PLACEHOLDER;
12778
+ const plan = await executeChainWithSchema(CommitSplitPlanSchema, llm, prompt, {
12779
+ ...variables,
12780
+ previous_attempt_feedback: previousFeedback,
12781
+ }, {
12782
+ logger,
12783
+ tokenizer,
12784
+ metadata: {
12785
+ task: 'commit-split-plan',
12786
+ ...metadata,
12787
+ planAttempt: attempt,
12788
+ },
12789
+ });
12790
+ const issues = getPlanValidationIssues(plan, staged, hunkInventory);
12791
+ if (!hasPlanValidationIssues(issues)) {
12792
+ if (attempt > 1 && logger) {
12793
+ logger.verbose(`Plan validated after ${attempt} attempts.`, { color: 'green' });
12794
+ }
12795
+ return { plan, attempts: attempt };
12796
+ }
12797
+ lastIssues = issues;
12798
+ if (logger) {
12799
+ logger.verbose(`Plan attempt ${attempt}/${maxAttempts} failed validation: ${formatPlanValidationIssuesError(issues)}`, { color: 'yellow' });
12800
+ }
12801
+ }
12802
+ throw new Error(lastIssues
12803
+ ? `Failed to produce a valid commit-split plan after ${maxAttempts} attempts. Final validator issues: ${formatPlanValidationIssuesError(lastIssues)}`
12804
+ : `Failed to produce a valid commit-split plan after ${maxAttempts} attempts.`);
12805
+ }
12806
+
12442
12807
  const COMMIT_SPLIT_PROMPT = prompts.PromptTemplate.fromTemplate(`You are helping split staged git changes into a small sequence of coherent commits.
12443
12808
 
12444
12809
  Return ONLY valid JSON matching this schema:
@@ -12455,14 +12820,13 @@ Return ONLY valid JSON matching this schema:
12455
12820
  }}
12456
12821
 
12457
12822
  Rules:
12458
- - Use each staged file exactly once.
12459
- - If a file has hunk IDs and contains unrelated changes, assign every hunk ID exactly once instead of assigning the whole file.
12460
- - Do not list the same file in "files" when assigning that file through "hunks".
12461
- - Only use file paths listed in the staged file inventory.
12462
- - Only use hunk IDs listed in the staged hunk inventory.
12823
+ - Every staged file MUST be assigned exactly once across all groups, either via "files" OR via every one of its hunk IDs (never both).
12824
+ - If you assign any hunk for a file, you MUST assign EVERY hunk for that file across the groups partial coverage is invalid.
12825
+ - Do not list the same file in "files" of more than one group, and do not assign the same hunk ID to more than one group.
12826
+ - Only use file paths listed in the staged file inventory. Do not invent files.
12827
+ - Only use hunk IDs listed in the staged hunk inventory. Do not invent hunk IDs.
12463
12828
  - Prefer 2-5 commits unless the changes are truly all one topic.
12464
12829
  - Keep commit titles concise and understandable.
12465
- - Do not invent files.
12466
12830
 
12467
12831
  Staged file inventory:
12468
12832
  {file_inventory}
@@ -12474,7 +12838,10 @@ Condensed staged diff:
12474
12838
  {summary}
12475
12839
 
12476
12840
  Additional context:
12477
- {additional_context}`);
12841
+ {additional_context}
12842
+
12843
+ Feedback on previous attempt (fix every item before responding):
12844
+ {previous_attempt_feedback}`);
12478
12845
  function isCommitSplitCommand(argv) {
12479
12846
  return Boolean(argv.split || argv.plan || argv.apply || argv._.includes('split'));
12480
12847
  }
@@ -12493,9 +12860,6 @@ function formatCommitSplitPlan(plan) {
12493
12860
  })
12494
12861
  .join('\n\n---\n\n');
12495
12862
  }
12496
- function getStagedFileSet(changes) {
12497
- return new Set(changes.map((change) => change.filePath));
12498
- }
12499
12863
  function getGroupFiles(group) {
12500
12864
  return group.files || [];
12501
12865
  }
@@ -12552,67 +12916,9 @@ function formatHunkInventory(inventory) {
12552
12916
  .join('\n');
12553
12917
  }
12554
12918
  function validatePlanForStagedFiles(plan, staged, hunkInventory) {
12555
- const stagedFiles = getStagedFileSet(staged);
12556
- const seen = new Set();
12557
- const seenHunks = new Set();
12558
- const unknown = [];
12559
- const duplicate = [];
12560
- const unknownHunks = [];
12561
- const duplicateHunks = [];
12562
- plan.groups.forEach((group) => {
12563
- getGroupFiles(group).forEach((file) => {
12564
- if (!stagedFiles.has(file)) {
12565
- unknown.push(file);
12566
- return;
12567
- }
12568
- if (seen.has(file)) {
12569
- duplicate.push(file);
12570
- return;
12571
- }
12572
- seen.add(file);
12573
- });
12574
- getGroupHunks(group).forEach((hunkId) => {
12575
- const hunk = hunkInventory?.byId.get(hunkId);
12576
- if (!hunk) {
12577
- unknownHunks.push(hunkId);
12578
- return;
12579
- }
12580
- if (seenHunks.has(hunkId)) {
12581
- duplicateHunks.push(hunkId);
12582
- return;
12583
- }
12584
- seenHunks.add(hunkId);
12585
- });
12586
- });
12587
- const hunkCoveredFiles = new Set([...seenHunks].map((hunkId) => hunkInventory?.byId.get(hunkId)?.filePath));
12588
- const mixedFiles = [...seen].filter((file) => hunkCoveredFiles.has(file));
12589
- const partiallyCoveredFiles = [...hunkCoveredFiles]
12590
- .filter((file) => Boolean(file))
12591
- .filter((file) => {
12592
- const fileHunks = hunkInventory?.byFile.get(file) || [];
12593
- return fileHunks.some((hunk) => !seenHunks.has(hunk.id));
12594
- });
12595
- const missing = [...stagedFiles].filter((file) => !seen.has(file) && !hunkCoveredFiles.has(file));
12596
- if (unknown.length ||
12597
- duplicate.length ||
12598
- unknownHunks.length ||
12599
- duplicateHunks.length ||
12600
- mixedFiles.length ||
12601
- partiallyCoveredFiles.length ||
12602
- missing.length) {
12603
- throw new Error([
12604
- unknown.length ? `unknown files: ${unknown.join(', ')}` : undefined,
12605
- duplicate.length ? `duplicate files: ${duplicate.join(', ')}` : undefined,
12606
- unknownHunks.length ? `unknown hunks: ${unknownHunks.join(', ')}` : undefined,
12607
- duplicateHunks.length ? `duplicate hunks: ${duplicateHunks.join(', ')}` : undefined,
12608
- mixedFiles.length ? `files assigned both as whole files and hunks: ${mixedFiles.join(', ')}` : undefined,
12609
- partiallyCoveredFiles.length
12610
- ? `files with only some hunks assigned: ${partiallyCoveredFiles.join(', ')}`
12611
- : undefined,
12612
- missing.length ? `missing files: ${missing.join(', ')}` : undefined,
12613
- ]
12614
- .filter(Boolean)
12615
- .join('; '));
12919
+ const issues = getPlanValidationIssues(plan, staged, hunkInventory);
12920
+ if (hasPlanValidationIssues(issues)) {
12921
+ throw new Error(formatPlanValidationIssuesError(issues));
12616
12922
  }
12617
12923
  }
12618
12924
  function assertNoUnstagedOverlap(plan, changes, hunkInventory) {
@@ -12716,22 +13022,26 @@ async function handleCommitSplit({ argv, config, git, logger, tokenizer, llm, })
12716
13022
  .map((change) => `- ${change.filePath}: ${change.status} - ${change.summary}`)
12717
13023
  .join('\n');
12718
13024
  const hunkInventoryText = formatHunkInventory(hunkInventory);
12719
- const plan = await executeChainWithSchema(CommitSplitPlanSchema, llm, COMMIT_SPLIT_PROMPT, {
12720
- file_inventory: fileInventory,
12721
- hunk_inventory: hunkInventoryText,
12722
- summary,
12723
- additional_context: argv.additional || '',
12724
- }, {
13025
+ const { plan } = await generateValidatedCommitSplitPlan({
13026
+ llm,
13027
+ prompt: COMMIT_SPLIT_PROMPT,
13028
+ variables: {
13029
+ file_inventory: fileInventory,
13030
+ hunk_inventory: hunkInventoryText,
13031
+ summary,
13032
+ additional_context: argv.additional || '',
13033
+ },
13034
+ staged: changes.staged,
13035
+ hunkInventory,
12725
13036
  logger,
12726
13037
  tokenizer,
12727
13038
  metadata: {
12728
- task: 'commit-split-plan',
12729
13039
  command: 'commit',
12730
13040
  provider: config.service.provider,
12731
13041
  model: String(config.service.model),
12732
13042
  },
13043
+ maxAttempts: DEFAULT_MAX_PLAN_ATTEMPTS,
12733
13044
  });
12734
- validatePlanForStagedFiles(plan, changes.staged, hunkInventory);
12735
13045
  if (argv.apply) {
12736
13046
  return await applyCommitSplitPlan({
12737
13047
  plan,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "git-coco",
3
- "version": "0.43.0",
3
+ "version": "0.45.0",
4
4
  "description": "zero-effort git commits with coco.",
5
5
  "author": "gfargo <ghfargo@gmail.com>",
6
6
  "license": "MIT",
@@ -85,7 +85,7 @@
85
85
  "ts-json-schema-generator": "^2.9.0",
86
86
  "ts-node": "^10.9.1",
87
87
  "tsx": "^4.16.5",
88
- "typescript": "^5.4.5"
88
+ "typescript": "^6.0.3"
89
89
  },
90
90
  "dependencies": {
91
91
  "@commitlint/core": "^20.5.0",