npm - git-coco - Versions diffs - 0.44.0 → 0.45.0 - Mend

git-coco 0.44.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -99,6 +99,35 @@ type BaseLLMService = {
      * @default 'balanced'
      */
     dynamicModelPreference?: DynamicModelPreference;
+    /**
+     * Opt-in fast paths that trade summary detail for speed. Each flag
+     * here replaces an LLM summary call with a deterministic templated
+     * extract for a specific file shape. Off by default — when enabled,
+     * you accept that final commit messages on those file shapes may be
+     * blander than LLM-generated summaries (the templated extract names
+     * structural changes only).
+     *
+     * Lossless optimizations (cache, trivial-shape skip on pure
+     * additions / deletions / renames / binary, sort discipline) ship
+     * default-on and are not configured here.
+     */
+    fastPath?: {
+        /**
+         * Replace the LLM summary with a templated heading extract for
+         * `.md` / `.mdx` / `.markdown` modification diffs that have clear
+         * heading-level structural changes. Diffs without structural
+         * signals (paragraph-only edits) still go to the LLM regardless
+         * of this flag.
+         *
+         * Bench impact (synthetic): collapses docs-update-shaped commits
+         * from ~24s cold to ~3ms (no LLM calls fire for the markdown
+         * files). Real-world wall-clock savings depend on per-call LLM
+         * latency.
+         *
+         * @default false
+         */
+        markdown?: boolean;
+    };
 };
 type Authentication = {
     type: 'None';
@@ -534,6 +563,19 @@ interface BaseParserOptions {
      * @default 6
      */
     maxConcurrent?: number;
+    /**
+     * Opt-in fast paths that trade summary detail for speed. Mirrors the
+     * `service.fastPath` shape. Off by default; lossless optimizations
+     * are not configured here.
+     */
+    fastPath?: {
+        /**
+         * Replace the LLM summary with a templated heading extract for
+         * markdown modification diffs with structural signals.
+         * @default false
+         */
+        markdown?: boolean;
+    };
     metadata?: Partial<LlmCallMetadata>;
 }
 interface BaseParserInput {

package/dist/index.esm.mjs CHANGED Viewed

@@ -54,7 +54,7 @@ import { pathToFileURL } from 'url';
 /**
  * Current build version from package.json
  */
-const BUILD_VERSION = "0.44.0";
+const BUILD_VERSION = "0.45.0";
 const isInteractive = (config) => {
     return config?.mode === 'interactive' || !!config?.interactive;
@@ -1228,6 +1228,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -1641,6 +1653,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -1797,6 +1821,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -7890,6 +7926,109 @@ async function summarize(documents, { chain, textSplitter, options, logger, toke
     return res.text && res.text.trim();
 }
+/**
+ * Markdown-aware fast path (#861, angle 5). For modification diffs to
+ * `.md` / `.mdx` / `.markdown` files, build a templated summary from
+ * the changed structure (added / removed / updated headings) instead
+ * of paying for an LLM call. Mirrors `trivialDiff` from #845: a deterministic
+ * skip when the diff's meaning is captured by its shape.
+ *
+ * Quality / cost trade-off, on purpose: LLM summaries of markdown edits
+ * are wordier ("expanded the configuration section with new examples,
+ * fixed typos in troubleshooting") but most of that detail isn't load-
+ * bearing for a commit message. The templated summary names the
+ * structural changes (which sections moved) plus a +/- line count, and
+ * defers to the LLM only when the diff has no clear structural signals
+ * (paragraph-only edits, where a templated summary would actually drop
+ * useful context).
+ */
+const MARKDOWN_EXTENSIONS = ['.md', '.markdown', '.mdx'];
+const MAX_HEADINGS_PER_BUCKET = 6;
+function isMarkdownFile(path) {
+    const lower = path.toLowerCase();
+    return MARKDOWN_EXTENSIONS.some((ext) => lower.endsWith(ext));
+}
+function summarizeMarkdownDiff(fileDiff) {
+    if (!isMarkdownFile(fileDiff.file))
+        return undefined;
+    const addedHeadings = new Set();
+    const removedHeadings = new Set();
+    let addedLines = 0;
+    let removedLines = 0;
+    for (const line of fileDiff.diff.split('\n')) {
+        if (isHeaderLine$1(line))
+            continue;
+        if (line.startsWith('+')) {
+            addedLines++;
+            const heading = parseHeading(line.slice(1));
+            if (heading)
+                addedHeadings.add(heading);
+        }
+        else if (line.startsWith('-')) {
+            removedLines++;
+            const heading = parseHeading(line.slice(1));
+            if (heading)
+                removedHeadings.add(heading);
+        }
+    }
+    // No content change → nothing to summarize. Caller falls through.
+    if (addedLines === 0 && removedLines === 0)
+        return undefined;
+    // No structural signal → fall through to LLM. We only fast-path
+    // when the diff has heading-level changes; pure paragraph edits go
+    // to the LLM so the summary keeps its detail.
+    if (addedHeadings.size === 0 && removedHeadings.size === 0) {
+        return undefined;
+    }
+    // A heading that appears in both buckets is likely an update (kept
+    // around but its body changed) rather than two distinct events.
+    // The naive split-by-bucket diff format used by git emits the old
+    // text under `-` and the new text under `+`; an unchanged heading
+    // line shouldn't show up in either bucket via the standard hunk
+    // path, but defensively de-dupe in case the diff producer emits
+    // surrounding context as +/-.
+    const updated = new Set([...addedHeadings].filter((h) => removedHeadings.has(h)));
+    const purelyAdded = [...addedHeadings].filter((h) => !updated.has(h));
+    const purelyRemoved = [...removedHeadings].filter((h) => !updated.has(h));
+    const parts = [`Updated markdown \`${fileDiff.file}\``];
+    if (purelyAdded.length) {
+        parts.push(`new sections: ${formatHeadingList(purelyAdded)}`);
+    }
+    if (purelyRemoved.length) {
+        parts.push(`removed sections: ${formatHeadingList(purelyRemoved)}`);
+    }
+    if (updated.size) {
+        parts.push(`updated sections: ${formatHeadingList([...updated])}`);
+    }
+    parts.push(`+${addedLines}/-${removedLines} lines`);
+    return `${parts.join('. ')}.`;
+}
+function formatHeadingList(headings) {
+    if (headings.length <= MAX_HEADINGS_PER_BUCKET) {
+        return headings.join(', ');
+    }
+    const shown = headings.slice(0, MAX_HEADINGS_PER_BUCKET);
+    const remainder = headings.length - shown.length;
+    return `${shown.join(', ')} (+${remainder} more)`;
+}
+function isHeaderLine$1(line) {
+    return (line.startsWith('diff --git') ||
+        line.startsWith('index ') ||
+        line.startsWith('--- ') ||
+        line.startsWith('+++ ') ||
+        line.startsWith('@@') ||
+        line.startsWith('new file mode') ||
+        line.startsWith('deleted file mode') ||
+        line.startsWith('similarity index') ||
+        line.startsWith('rename from ') ||
+        line.startsWith('rename to ') ||
+        line.startsWith('Binary files '));
+}
+function parseHeading(line) {
+    const match = line.match(/^#{1,6}\s+(.+?)\s*$/);
+    return match ? match[1].trim() : undefined;
+}
 /**
  * Inspect a unified-diff string and report its shape, or undefined
  * if the diff isn't trivial (mixed +/- lines, weird headers, etc.).
@@ -8027,7 +8166,7 @@ function isCacheEnabled$1() {
  * synthetic summaries usually drop the directory token totals under
  * budget so wave consolidation skips too.
  */
-async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, }) {
+async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, fastPath, }) {
     const trivialSummary = summarizeTrivialDiff(fileDiff);
     if (trivialSummary !== undefined) {
         logger.verbose(` - ${fileDiff.file}: trivial-shape skip (no LLM call)`, { color: 'gray' });
@@ -8037,6 +8176,25 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
             tokenCount: tokenizer(trivialSummary),
         };
     }
+    // Markdown fast path (#861, angle 5). Opt-in via `fastPath.markdown`
+    // because it's a lossy optimization: the templated summary names
+    // structural changes only and drops body-text detail that an LLM
+    // summary would carry. Off by default; users who prefer summary
+    // fidelity over speed (which is the safer default for commit-message
+    // generation downstream) keep the LLM path. When the flag IS on, the
+    // fast path still falls through to the LLM for paragraph-only edits
+    // where a templated summary would lose useful context.
+    if (fastPath?.markdown) {
+        const markdownSummary = summarizeMarkdownDiff(fileDiff);
+        if (markdownSummary !== undefined) {
+            logger.verbose(` - ${fileDiff.file}: markdown fast-path skip (no LLM call)`, { color: 'gray' });
+            return {
+                ...fileDiff,
+                diff: markdownSummary,
+                tokenCount: tokenizer(markdownSummary),
+            };
+        }
+    }
     // Cache lookup (#845, PR 5). Keyed on the file's literal diff
     // content + the active model + the summarization prompt hash.
     // A hit returns the prior summary instantly; on iterative
@@ -8148,7 +8306,7 @@ function createLimit$2(maxConcurrent) {
  * @returns Array of file diffs with large files summarized
  */
 async function summarizeLargeFiles(diffs, options) {
-    const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter, metadata } = options;
+    const { maxFileTokens, minTokensForSummary, maxConcurrent, maxTokens, fastPath, tokenizer, logger, chain, textSplitter, metadata, } = options;
     // Identify files that need summarization
     const filesToSummarize = [];
     const results = [...diffs];
@@ -8160,17 +8318,57 @@ async function summarizeLargeFiles(diffs, options) {
     if (filesToSummarize.length === 0) {
         return results;
     }
-    logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
-    // Process large files in waves
-    const summarizedFiles = await processInWaves$1(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer, logger, metadata }), maxConcurrent);
-    // Update results with summarized files
-    summarizedFiles.forEach((summarizedDiff, i) => {
+    // Incremental termination (#861, PR 1). When the caller supplies a
+    // budget, dispatch biggest-first and re-check the running total per
+    // dispatch — once earlier completions drop the total under maxTokens,
+    // the remaining queued files skip the LLM and keep their raw diffs.
+    // Mirrors the Phase 3 pattern in `summarizeDiffs.ts`. Without a
+    // budget (undefined), behavior matches the prior path: every
+    // eligible file is summarized regardless.
+    filesToSummarize.sort((a, b) => b.diff.tokenCount - a.diff.tokenCount);
+    const incrementalTermination = maxTokens !== undefined;
+    let runningTotal = diffs.reduce((sum, diff) => sum + diff.tokenCount, 0);
+    let summarizedCount = 0;
+    let skippedCount = 0;
+    logger.verbose(`Pre-summarizing up to ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
+    const processed = await processInWaves$1(filesToSummarize, async ({ diff }) => {
+        // Re-check the budget at dispatch time when the caller supplied
+        // one. Earlier completions may have already dropped the total
+        // under the cap; in that case skip the LLM call entirely and
+        // keep the raw diff. Without a budget, every eligible file is
+        // summarized (preserves the prior behavior).
+        if (incrementalTermination && runningTotal <= maxTokens) {
+            return { diff, summarized: false };
+        }
+        const summarized = await summarizeFileDiff(diff, {
+            chain,
+            textSplitter,
+            tokenizer,
+            logger,
+            metadata,
+            fastPath,
+        });
+        const delta = diff.tokenCount - summarized.tokenCount;
+        if (delta > 0) {
+            runningTotal -= delta;
+        }
+        return { diff: summarized, summarized: true };
+    }, maxConcurrent);
+    processed.forEach((entry, i) => {
         const originalIndex = filesToSummarize[i].index;
+        if (!entry.summarized) {
+            skippedCount++;
+            return;
+        }
+        summarizedCount++;
         const originalTokens = results[originalIndex].tokenCount;
-        const newTokens = summarizedDiff.tokenCount;
-        logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
-        results[originalIndex] = summarizedDiff;
+        const newTokens = entry.diff.tokenCount;
+        logger.verbose(` - ${entry.diff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
+        results[originalIndex] = entry.diff;
     });
+    if (skippedCount > 0) {
+        logger.verbose(`Skipped ${skippedCount} pre-summary call(s) — token budget already met after ${summarizedCount} earlier file(s)`, { color: 'cyan' });
+    }
     return results;
 }
 /**
@@ -8436,7 +8634,7 @@ async function summarizeDiffs(rootDiffNode, { tokenizer, logger,
 // with the service defaults means a caller that omits
 // `maxTokens` doesn't accidentally fall into a tighter budget
 // than the rest of the system assumes.
-maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
+maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, fastPath, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
     // Calculate maxFileTokens as 25% of maxTokens if not specified
     const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
     // PHASE 1: Directory grouping & assessment
@@ -8460,6 +8658,13 @@ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, t
         maxFileTokens: effectiveMaxFileTokens,
         minTokensForSummary,
         maxConcurrent,
+        // #861, PR 1: pass the overall budget so Phase 2 can short-circuit
+        // once earlier completions drop the running total under the cap.
+        maxTokens,
+        // #861, angle 5: opt-in markdown fast path. Off by default; when
+        // enabled, markdown modification diffs with structural signals
+        // resolve via a templated extract instead of an LLM call.
+        fastPath,
         tokenizer,
         logger,
         chain,
@@ -11437,7 +11642,7 @@ for (var i = 0; i < 256; i++) {
   simpleEscapeMap[i] = simpleEscapeSequence(i);
 }
-async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, metadata, }, }) {
+async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, fastPath, metadata, }, }) {
     const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
     const summarizationChain = loadSummarizationChain(model, {
         type: 'map_reduce',
@@ -11469,6 +11674,7 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
         minTokensForSummary,
         maxFileTokens,
         maxConcurrent,
+        fastPath,
         textSplitter,
         chain: summarizationChain,
         logger,
@@ -11488,6 +11694,7 @@ function createFileChangeParserOptions({ command, git, llm, logger, model, provi
         minTokensForSummary: service?.minTokensForSummary,
         maxFileTokens: service?.maxFileTokens,
         maxConcurrent: service?.maxConcurrent,
+        fastPath: service?.fastPath,
         metadata: {
             command,
             provider,

package/dist/index.js CHANGED Viewed

@@ -78,7 +78,7 @@ var readline__namespace = /*#__PURE__*/_interopNamespaceDefault(readline);
 /**
  * Current build version from package.json
  */
-const BUILD_VERSION = "0.44.0";
+const BUILD_VERSION = "0.45.0";
 const isInteractive = (config) => {
     return config?.mode === 'interactive' || !!config?.interactive;
@@ -1252,6 +1252,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -1665,6 +1677,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -1821,6 +1845,18 @@ const schema$1 = {
                     "$ref": "#/definitions/DynamicModelPreference",
                     "description": "Default dynamic routing preference when model is set to \"dynamic\".",
                     "default": "balanced"
+                },
+                "fastPath": {
+                    "type": "object",
+                    "properties": {
+                        "markdown": {
+                            "type": "boolean",
+                            "description": "Replace the LLM summary with a templated heading extract for `.md` / `.mdx` / `.markdown` modification diffs that have clear heading-level structural changes. Diffs without structural signals (paragraph-only edits) still go to the LLM regardless of this flag.\n\nBench impact (synthetic): collapses docs-update-shaped commits from ~24s cold to ~3ms (no LLM calls fire for the markdown files). Real-world wall-clock savings depend on per-call LLM latency.",
+                            "default": false
+                        }
+                    },
+                    "additionalProperties": false,
+                    "description": "Opt-in fast paths that trade summary detail for speed. Each flag here replaces an LLM summary call with a deterministic templated extract for a specific file shape. Off by default — when enabled, you accept that final commit messages on those file shapes may be blander than LLM-generated summaries (the templated extract names structural changes only).\n\nLossless optimizations (cache, trivial-shape skip on pure additions / deletions / renames / binary, sort discipline) ship default-on and are not configured here."
                 }
             },
             "required": [
@@ -7914,6 +7950,109 @@ async function summarize(documents$1, { chain, textSplitter, options, logger, to
     return res.text && res.text.trim();
 }
+/**
+ * Markdown-aware fast path (#861, angle 5). For modification diffs to
+ * `.md` / `.mdx` / `.markdown` files, build a templated summary from
+ * the changed structure (added / removed / updated headings) instead
+ * of paying for an LLM call. Mirrors `trivialDiff` from #845: a deterministic
+ * skip when the diff's meaning is captured by its shape.
+ *
+ * Quality / cost trade-off, on purpose: LLM summaries of markdown edits
+ * are wordier ("expanded the configuration section with new examples,
+ * fixed typos in troubleshooting") but most of that detail isn't load-
+ * bearing for a commit message. The templated summary names the
+ * structural changes (which sections moved) plus a +/- line count, and
+ * defers to the LLM only when the diff has no clear structural signals
+ * (paragraph-only edits, where a templated summary would actually drop
+ * useful context).
+ */
+const MARKDOWN_EXTENSIONS = ['.md', '.markdown', '.mdx'];
+const MAX_HEADINGS_PER_BUCKET = 6;
+function isMarkdownFile(path) {
+    const lower = path.toLowerCase();
+    return MARKDOWN_EXTENSIONS.some((ext) => lower.endsWith(ext));
+}
+function summarizeMarkdownDiff(fileDiff) {
+    if (!isMarkdownFile(fileDiff.file))
+        return undefined;
+    const addedHeadings = new Set();
+    const removedHeadings = new Set();
+    let addedLines = 0;
+    let removedLines = 0;
+    for (const line of fileDiff.diff.split('\n')) {
+        if (isHeaderLine$1(line))
+            continue;
+        if (line.startsWith('+')) {
+            addedLines++;
+            const heading = parseHeading(line.slice(1));
+            if (heading)
+                addedHeadings.add(heading);
+        }
+        else if (line.startsWith('-')) {
+            removedLines++;
+            const heading = parseHeading(line.slice(1));
+            if (heading)
+                removedHeadings.add(heading);
+        }
+    }
+    // No content change → nothing to summarize. Caller falls through.
+    if (addedLines === 0 && removedLines === 0)
+        return undefined;
+    // No structural signal → fall through to LLM. We only fast-path
+    // when the diff has heading-level changes; pure paragraph edits go
+    // to the LLM so the summary keeps its detail.
+    if (addedHeadings.size === 0 && removedHeadings.size === 0) {
+        return undefined;
+    }
+    // A heading that appears in both buckets is likely an update (kept
+    // around but its body changed) rather than two distinct events.
+    // The naive split-by-bucket diff format used by git emits the old
+    // text under `-` and the new text under `+`; an unchanged heading
+    // line shouldn't show up in either bucket via the standard hunk
+    // path, but defensively de-dupe in case the diff producer emits
+    // surrounding context as +/-.
+    const updated = new Set([...addedHeadings].filter((h) => removedHeadings.has(h)));
+    const purelyAdded = [...addedHeadings].filter((h) => !updated.has(h));
+    const purelyRemoved = [...removedHeadings].filter((h) => !updated.has(h));
+    const parts = [`Updated markdown \`${fileDiff.file}\``];
+    if (purelyAdded.length) {
+        parts.push(`new sections: ${formatHeadingList(purelyAdded)}`);
+    }
+    if (purelyRemoved.length) {
+        parts.push(`removed sections: ${formatHeadingList(purelyRemoved)}`);
+    }
+    if (updated.size) {
+        parts.push(`updated sections: ${formatHeadingList([...updated])}`);
+    }
+    parts.push(`+${addedLines}/-${removedLines} lines`);
+    return `${parts.join('. ')}.`;
+}
+function formatHeadingList(headings) {
+    if (headings.length <= MAX_HEADINGS_PER_BUCKET) {
+        return headings.join(', ');
+    }
+    const shown = headings.slice(0, MAX_HEADINGS_PER_BUCKET);
+    const remainder = headings.length - shown.length;
+    return `${shown.join(', ')} (+${remainder} more)`;
+}
+function isHeaderLine$1(line) {
+    return (line.startsWith('diff --git') ||
+        line.startsWith('index ') ||
+        line.startsWith('--- ') ||
+        line.startsWith('+++ ') ||
+        line.startsWith('@@') ||
+        line.startsWith('new file mode') ||
+        line.startsWith('deleted file mode') ||
+        line.startsWith('similarity index') ||
+        line.startsWith('rename from ') ||
+        line.startsWith('rename to ') ||
+        line.startsWith('Binary files '));
+}
+function parseHeading(line) {
+    const match = line.match(/^#{1,6}\s+(.+?)\s*$/);
+    return match ? match[1].trim() : undefined;
+}
 /**
  * Inspect a unified-diff string and report its shape, or undefined
  * if the diff isn't trivial (mixed +/- lines, weird headers, etc.).
@@ -8051,7 +8190,7 @@ function isCacheEnabled$1() {
  * synthetic summaries usually drop the directory token totals under
  * budget so wave consolidation skips too.
  */
-async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, }) {
+async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, fastPath, }) {
     const trivialSummary = summarizeTrivialDiff(fileDiff);
     if (trivialSummary !== undefined) {
         logger.verbose(` - ${fileDiff.file}: trivial-shape skip (no LLM call)`, { color: 'gray' });
@@ -8061,6 +8200,25 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
             tokenCount: tokenizer(trivialSummary),
         };
     }
+    // Markdown fast path (#861, angle 5). Opt-in via `fastPath.markdown`
+    // because it's a lossy optimization: the templated summary names
+    // structural changes only and drops body-text detail that an LLM
+    // summary would carry. Off by default; users who prefer summary
+    // fidelity over speed (which is the safer default for commit-message
+    // generation downstream) keep the LLM path. When the flag IS on, the
+    // fast path still falls through to the LLM for paragraph-only edits
+    // where a templated summary would lose useful context.
+    if (fastPath?.markdown) {
+        const markdownSummary = summarizeMarkdownDiff(fileDiff);
+        if (markdownSummary !== undefined) {
+            logger.verbose(` - ${fileDiff.file}: markdown fast-path skip (no LLM call)`, { color: 'gray' });
+            return {
+                ...fileDiff,
+                diff: markdownSummary,
+                tokenCount: tokenizer(markdownSummary),
+            };
+        }
+    }
     // Cache lookup (#845, PR 5). Keyed on the file's literal diff
     // content + the active model + the summarization prompt hash.
     // A hit returns the prior summary instantly; on iterative
@@ -8172,7 +8330,7 @@ function createLimit$2(maxConcurrent) {
  * @returns Array of file diffs with large files summarized
  */
 async function summarizeLargeFiles(diffs, options) {
-    const { maxFileTokens, minTokensForSummary, maxConcurrent, tokenizer, logger, chain, textSplitter, metadata } = options;
+    const { maxFileTokens, minTokensForSummary, maxConcurrent, maxTokens, fastPath, tokenizer, logger, chain, textSplitter, metadata, } = options;
     // Identify files that need summarization
     const filesToSummarize = [];
     const results = [...diffs];
@@ -8184,17 +8342,57 @@ async function summarizeLargeFiles(diffs, options) {
     if (filesToSummarize.length === 0) {
         return results;
     }
-    logger.verbose(`Pre-summarizing ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
-    // Process large files in waves
-    const summarizedFiles = await processInWaves$1(filesToSummarize, async ({ diff }) => summarizeFileDiff(diff, { chain, textSplitter, tokenizer, logger, metadata }), maxConcurrent);
-    // Update results with summarized files
-    summarizedFiles.forEach((summarizedDiff, i) => {
+    // Incremental termination (#861, PR 1). When the caller supplies a
+    // budget, dispatch biggest-first and re-check the running total per
+    // dispatch — once earlier completions drop the total under maxTokens,
+    // the remaining queued files skip the LLM and keep their raw diffs.
+    // Mirrors the Phase 3 pattern in `summarizeDiffs.ts`. Without a
+    // budget (undefined), behavior matches the prior path: every
+    // eligible file is summarized regardless.
+    filesToSummarize.sort((a, b) => b.diff.tokenCount - a.diff.tokenCount);
+    const incrementalTermination = maxTokens !== undefined;
+    let runningTotal = diffs.reduce((sum, diff) => sum + diff.tokenCount, 0);
+    let summarizedCount = 0;
+    let skippedCount = 0;
+    logger.verbose(`Pre-summarizing up to ${filesToSummarize.length} large file(s)...`, { color: 'blue' });
+    const processed = await processInWaves$1(filesToSummarize, async ({ diff }) => {
+        // Re-check the budget at dispatch time when the caller supplied
+        // one. Earlier completions may have already dropped the total
+        // under the cap; in that case skip the LLM call entirely and
+        // keep the raw diff. Without a budget, every eligible file is
+        // summarized (preserves the prior behavior).
+        if (incrementalTermination && runningTotal <= maxTokens) {
+            return { diff, summarized: false };
+        }
+        const summarized = await summarizeFileDiff(diff, {
+            chain,
+            textSplitter,
+            tokenizer,
+            logger,
+            metadata,
+            fastPath,
+        });
+        const delta = diff.tokenCount - summarized.tokenCount;
+        if (delta > 0) {
+            runningTotal -= delta;
+        }
+        return { diff: summarized, summarized: true };
+    }, maxConcurrent);
+    processed.forEach((entry, i) => {
         const originalIndex = filesToSummarize[i].index;
+        if (!entry.summarized) {
+            skippedCount++;
+            return;
+        }
+        summarizedCount++;
         const originalTokens = results[originalIndex].tokenCount;
-        const newTokens = summarizedDiff.tokenCount;
-        logger.verbose(` - ${summarizedDiff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
-        results[originalIndex] = summarizedDiff;
+        const newTokens = entry.diff.tokenCount;
+        logger.verbose(` - ${entry.diff.file}: ${originalTokens} -> ${newTokens} tokens`, { color: 'magenta' });
+        results[originalIndex] = entry.diff;
     });
+    if (skippedCount > 0) {
+        logger.verbose(`Skipped ${skippedCount} pre-summary call(s) — token budget already met after ${summarizedCount} earlier file(s)`, { color: 'cyan' });
+    }
     return results;
 }
 /**
@@ -8460,7 +8658,7 @@ async function summarizeDiffs(rootDiffNode, { tokenizer, logger,
 // with the service defaults means a caller that omits
 // `maxTokens` doesn't accidentally fall into a tighter budget
 // than the rest of the system assumes.
-maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
+maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, fastPath, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
     // Calculate maxFileTokens as 25% of maxTokens if not specified
     const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
     // PHASE 1: Directory grouping & assessment
@@ -8484,6 +8682,13 @@ maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, t
         maxFileTokens: effectiveMaxFileTokens,
         minTokensForSummary,
         maxConcurrent,
+        // #861, PR 1: pass the overall budget so Phase 2 can short-circuit
+        // once earlier completions drop the running total under the cap.
+        maxTokens,
+        // #861, angle 5: opt-in markdown fast path. Off by default; when
+        // enabled, markdown modification diffs with structural signals
+        // resolve via a templated extract instead of an LLM call.
+        fastPath,
         tokenizer,
         logger,
         chain,
@@ -11461,7 +11666,7 @@ for (var i = 0; i < 256; i++) {
   simpleEscapeMap[i] = simpleEscapeSequence(i);
 }
-async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, metadata, }, }) {
+async function fileChangeParser({ changes, commit, options: { tokenizer, git, llm: model, logger, maxTokens, minTokensForSummary, maxFileTokens, maxConcurrent, fastPath, metadata, }, }) {
     const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 10000, chunkOverlap: 250 });
     const summarizationChain = loadSummarizationChain(model, {
         type: 'map_reduce',
@@ -11493,6 +11698,7 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
         minTokensForSummary,
         maxFileTokens,
         maxConcurrent,
+        fastPath,
         textSplitter,
         chain: summarizationChain,
         logger,
@@ -11512,6 +11718,7 @@ function createFileChangeParserOptions({ command, git, llm, logger, model, provi
         minTokensForSummary: service?.minTokensForSummary,
         maxFileTokens: service?.maxFileTokens,
         maxConcurrent: service?.maxConcurrent,
+        fastPath: service?.fastPath,
         metadata: {
             command,
             provider,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "git-coco",
-  "version": "0.44.0",
+  "version": "0.45.0",
   "description": "zero-effort git commits with coco.",
   "author": "gfargo <ghfargo@gmail.com>",
   "license": "MIT",
@@ -85,7 +85,7 @@
     "ts-json-schema-generator": "^2.9.0",
     "ts-node": "^10.9.1",
     "tsx": "^4.16.5",
-    "typescript": "^5.4.5"
+    "typescript": "^6.0.3"
   },
   "dependencies": {
     "@commitlint/core": "^20.5.0",