npm - git-coco - Versions diffs - 0.42.0 → 0.43.0 - Mend

git-coco 0.42.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.esm.mjs CHANGED Viewed

@@ -5,6 +5,8 @@ import yargs from 'yargs';
 import chalk from 'chalk';
 import * as fs from 'fs';
 import fs__default, { promises, existsSync, readFileSync, readdirSync } from 'fs';
+import * as crypto from 'node:crypto';
+import { createHash } from 'node:crypto';
 import * as ini from 'ini';
 import * as os from 'os';
 import os__default, { tmpdir } from 'os';
@@ -14,11 +16,12 @@ import Ajv from 'ajv';
 import ora from 'ora';
 import now from 'performance-now';
 import prettyMilliseconds from 'pretty-ms';
+import * as fs$1 from 'node:fs';
+import * as os$1 from 'node:os';
+import * as path$1 from 'node:path';
 import { ChatAnthropic } from '@langchain/anthropic';
 import { ChatOllama } from '@langchain/ollama';
 import { ChatOpenAI } from '@langchain/openai';
-import * as fs$1 from 'node:fs';
-import * as path$1 from 'node:path';
 import { StructuredOutputParser, BaseOutputParser, StringOutputParser } from '@langchain/core/output_parsers';
 import { minimatch } from 'minimatch';
 import { simpleGit, GitError } from 'simple-git';
@@ -40,8 +43,6 @@ import '@langchain/core/utils/async_caller';
 import { encoding_for_model } from 'tiktoken';
 import { spawn, exec, execFile } from 'child_process';
 import { spawnSync } from 'node:child_process';
-import * as os$1 from 'node:os';
-import * as crypto from 'node:crypto';
 import * as readline from 'readline';
 import readline__default from 'readline';
 import { promisify } from 'util';
@@ -53,7 +54,7 @@ import { pathToFileURL } from 'url';
 /**
  * Current build version from package.json
  */
-const BUILD_VERSION = "0.42.0";
+const BUILD_VERSION = "0.43.0";
 const isInteractive = (config) => {
     return config?.mode === 'interactive' || !!config?.interactive;
@@ -229,6 +230,19 @@ const SUMMARIZE_PROMPT = new PromptTemplate({
     inputVariables: inputVariables$4,
     template: template$4,
 });
+/**
+ * Stable fingerprint of the active summarization template (#845, PR 5).
+ *
+ * The diff-summary cache keys include this hash so any prompt edit
+ * invalidates prior cache entries automatically — no manual bumps,
+ * no stale outputs that no longer reflect the current prompt's voice
+ * or rules. Only the template body matters; whitespace differences
+ * still re-key the cache, which is the safe default.
+ */
+const SUMMARIZE_PROMPT_HASH = createHash('sha256')
+    .update(template$4)
+    .digest('hex')
+    .slice(0, 16);
 /**
  * Base class for all LangChain-related errors
@@ -412,10 +426,21 @@ function getDefaultServiceApiKey(config) {
 }
 const DEFAULT_OPENAI_LLM_SERVICE = {
     provider: 'openai',
-    model: 'gpt-4o-mini',
+    // Bumped from `gpt-4o-mini` to `gpt-4.1-nano` (#854). Diff
+    // condensing is bounded summarization — the cheaper / faster
+    // tier is the right default for it; quality is on par for this
+    // class of task. Users who want the older 4o-mini can still
+    // override via service config.
+    model: 'gpt-4.1-nano',
     tokenLimit: 4096,
     temperature: 0.32,
-    maxConcurrent: 12,
+    // Bumped 12 → 24 (#845, PR 3). The OpenAI fast tier comfortably
+    // handles ~30 concurrent on the per-key default rate limit; 24
+    // leaves headroom for retries while still doubling throughput.
+    // The summarize chain has a 429-aware backoff (`summarize`
+    // helper) so a temporary rate-limit hit no longer kills the
+    // whole pipeline.
+    maxConcurrent: 24,
     minTokensForSummary: 800,
     maxFileTokens: 2000,
     authentication: {
@@ -427,10 +452,20 @@ const DEFAULT_OPENAI_LLM_SERVICE = {
 };
 const DEFAULT_ANTHROPIC_LLM_SERVICE = {
     provider: 'anthropic',
-    model: 'claude-3-5-sonnet-20240620',
+    // Bumped from `claude-3-5-sonnet-20240620` to
+    // `claude-haiku-4-5-20251001` (#854). The Sonnet 3.5 default
+    // was nearly two model generations stale; Haiku 4.5 is the
+    // current fast tier and the right fit for diff summarization.
+    // Users who want Sonnet for quality-sensitive runs can still
+    // override via service config (recommended: `claude-sonnet-4-6`).
+    model: 'claude-haiku-4-5-20251001',
     temperature: 0.32,
     tokenLimit: 4096,
-    maxConcurrent: 12,
+    // Bumped 12 → 24 (#845, PR 3). Matches the OpenAI default;
+    // Anthropic's per-key concurrency on Haiku is generous enough
+    // that 24 stays under the rate ceiling for typical fast-model
+    // request shapes. Backoff in `summarize` handles spikes.
+    maxConcurrent: 24,
     minTokensForSummary: 800,
     maxFileTokens: 2000,
     authentication: {
@@ -1414,6 +1449,10 @@ const schema$1 = {
         "AnthropicModel": {
             "type": "string",
             "enum": [
+                "claude-sonnet-4-6",
+                "claude-haiku-4-5-20251001",
+                "claude-haiku-4-5",
+                "claude-opus-4-7",
                 "claude-sonnet-4-0",
                 "claude-3-7-sonnet-latest",
                 "claude-3-5-haiku-latest",
@@ -1947,6 +1986,44 @@ function parseServiceConfig(service) {
     }
 }
+/**
+ * Ensure the canonical default ignore lists are always present in
+ * the resolved config (#851). User-provided `ignoredFiles` /
+ * `ignoredExtensions` arrays from XDG / git / project / env config
+ * sources used to *replace* the defaults wholesale via the shallow
+ * spread in each loader, which silently dropped lockfile + node_modules
+ * entries the moment a user provided their own list. The reported
+ * symptom: `pnpm-lock.yaml` reaching the diff-condensing pipeline
+ * after a user added `.coco.config.json` for unrelated overrides.
+ *
+ * Now: user values are *unioned* with the defaults. Order is preserved
+ * (defaults first, then user-only additions in their original order).
+ * Duplicates are de-duped. The defaults can no longer be opted out of —
+ * the cost of accidentally summarizing a lockfile (minutes of LLM time
+ * per commit) outweighs the niche case of intentionally excluding a
+ * default lockfile pattern.
+ */
+function unionPreservingOrder(base, extras) {
+    if (!extras || extras.length === 0)
+        return [...base];
+    const seen = new Set(base);
+    const merged = [...base];
+    for (const value of extras) {
+        if (!seen.has(value)) {
+            seen.add(value);
+            merged.push(value);
+        }
+    }
+    return merged;
+}
+function mergeIgnoreLists(config) {
+    return {
+        ...config,
+        ignoredFiles: unionPreservingOrder(DEFAULT_IGNORED_FILES$1, config.ignoredFiles),
+        ignoredExtensions: unionPreservingOrder(DEFAULT_IGNORED_EXTENSIONS$1, config.ignoredExtensions),
+    };
+}
 /**
  * Tracked config sources populated during the last loadConfig call.
  * Useful for diagnostics (e.g. `coco doctor`).
@@ -1997,6 +2074,13 @@ function loadConfig(argv = {}) {
     config = envConfig;
     if (envActive)
         sources.push({ source: 'env' });
+    // Re-apply the canonical default ignore lists after every loader has
+    // had a chance to override (#851). Each loader replaces ignoredFiles
+    // / ignoredExtensions wholesale via shallow spread, which used to
+    // silently drop the lockfile + node_modules defaults the moment a
+    // user provided their own list. The merge is a union — defaults first,
+    // user-only entries appended.
+    config = mergeIgnoreLists(config);
     _lastConfigSources = sources;
     return { ...config, ...argv };
 }
@@ -2171,6 +2255,232 @@ function commandExecutor(handler) {
     };
 }
+const command$8 = 'cache <subcommand>';
+const builder$8 = (yargs) => {
+    return yargs
+        .positional('subcommand', {
+        describe: 'Cache action to run (clear, info)',
+        type: 'string',
+        choices: ['clear', 'info'],
+    })
+        .usage(getCommandUsageHeader(command$8));
+};
+/**
+ * Per-repo disk cache of LLM-summarized diffs (#845, PR 5). On a
+ * re-run of `coco commit` after a small change, most files have
+ * unchanged content and unchanged diffs — caching their summaries
+ * by content hash means the second run skips the LLM entirely for
+ * those files and only pays for what's actually different.
+ *
+ * Strict best-effort: read failures fall back to "no cache" (the
+ * pipeline runs the LLM as before), and write failures are
+ * swallowed silently. The cache is never load-bearing.
+ *
+ * Repos are keyed by a short hash of their absolute path. No PII
+ * in the cache filename, and re-creating a repo at the same path
+ * keeps the same cache.
+ *
+ * Cache key: `sha256(diff + ':' + model + ':' + promptHash)`.
+ *   - diff: the literal diff text being summarized
+ *   - model: switching models invalidates (different summaries)
+ *   - promptHash: editing the SUMMARIZE_PROMPT template invalidates
+ *
+ * Cap: 500 entries per repo. LRU eviction on overflow keeps the
+ * cache file under ~500 KB on a typical repo (each entry is a
+ * sha256 hash + 200-500-byte summary).
+ */
+const CACHE_SCHEMA_VERSION$1 = 1;
+const CACHE_DIR_NAME$1 = 'diff-summaries';
+const CACHE_ENTRY_HARD_CAP = 500;
+function resolveCacheDir$4() {
+    const xdg = process.env.XDG_CACHE_HOME;
+    if (xdg && xdg.trim().length > 0) {
+        return path$1.join(xdg, 'coco', CACHE_DIR_NAME$1);
+    }
+    return path$1.join(os$1.homedir(), '.cache', 'coco', CACHE_DIR_NAME$1);
+}
+function repoKey$3(repoPath) {
+    // sha256 here is a non-security cache-key derivation — deterministic
+    // short identifier for the cache filename so two repos at different
+    // paths never collide. We truncate to 16 chars; collision-resistance
+    // against an adversary is not required.
+    return crypto.createHash('sha256').update(repoPath).digest('hex').slice(0, 16);
+}
+function getDiffSummaryCachePath(repoPath) {
+    return path$1.join(resolveCacheDir$4(), `summaries.${repoKey$3(repoPath)}.json`);
+}
+/**
+ * Build the cache key for a (diff, model, prompt) tuple. sha256
+ * because we want a strong content-hash; the per-entry storage cost
+ * is dominated by the summary text anyway.
+ */
+function diffSummaryKey(diff, model, promptHash) {
+    return crypto
+        .createHash('sha256')
+        .update(`${diff}\x1f${model}\x1f${promptHash}`)
+        .digest('hex');
+}
+function readEnvelope(filePath) {
+    try {
+        const raw = fs$1.readFileSync(filePath, 'utf8');
+        const parsed = JSON.parse(raw);
+        if (parsed.version !== CACHE_SCHEMA_VERSION$1)
+            return undefined;
+        if (!parsed.entries || typeof parsed.entries !== 'object')
+            return undefined;
+        return parsed;
+    }
+    catch {
+        return undefined;
+    }
+}
+function readDiffSummary(repoPath, key) {
+    const envelope = readEnvelope(getDiffSummaryCachePath(repoPath));
+    if (!envelope)
+        return undefined;
+    const entry = envelope.entries[key];
+    if (!entry)
+        return undefined;
+    return entry;
+}
+function writeDiffSummary(repoPath, key, entry) {
+    const filePath = getDiffSummaryCachePath(repoPath);
+    const existing = readEnvelope(filePath) || {
+        version: CACHE_SCHEMA_VERSION$1,
+        savedAt: new Date().toISOString(),
+        entries: {},
+    };
+    existing.entries[key] = { ...entry, lastAccessedAt: new Date().toISOString() };
+    existing.savedAt = new Date().toISOString();
+    const evictedEntries = enforceHardCap(existing.entries);
+    if (evictedEntries.length > 0) {
+        for (const evicted of evictedEntries) {
+            delete existing.entries[evicted];
+        }
+    }
+    try {
+        fs$1.mkdirSync(path$1.dirname(filePath), { recursive: true });
+        fs$1.writeFileSync(filePath, JSON.stringify(existing));
+    }
+    catch {
+        // Best-effort persistence; swallow.
+    }
+}
+/**
+ * Touch an existing entry's lastAccessedAt so LRU eviction prefers
+ * dropping older / unused entries. Caller is expected to know the
+ * entry exists (read returned a hit).
+ */
+function touchDiffSummary(repoPath, key) {
+    const filePath = getDiffSummaryCachePath(repoPath);
+    const envelope = readEnvelope(filePath);
+    if (!envelope || !envelope.entries[key])
+        return;
+    envelope.entries[key] = {
+        ...envelope.entries[key],
+        lastAccessedAt: new Date().toISOString(),
+    };
+    envelope.savedAt = new Date().toISOString();
+    try {
+        fs$1.writeFileSync(filePath, JSON.stringify(envelope));
+    }
+    catch {
+        // Swallow.
+    }
+}
+function enforceHardCap(entries) {
+    const keys = Object.keys(entries);
+    if (keys.length <= CACHE_ENTRY_HARD_CAP)
+        return [];
+    // Sort by lastAccessedAt ascending (oldest first), drop the
+    // oldest (keys.length - CACHE_ENTRY_HARD_CAP) entries.
+    const sorted = keys
+        .map((key) => ({ key, accessed: Date.parse(entries[key].lastAccessedAt) || 0 }))
+        .sort((a, b) => a.accessed - b.accessed);
+    const toEvict = sorted.slice(0, keys.length - CACHE_ENTRY_HARD_CAP).map((entry) => entry.key);
+    return toEvict;
+}
+/** Remove the entire cache file for the repo. Used by `coco cache:clear`. */
+function clearDiffSummaryCache(repoPath) {
+    const filePath = getDiffSummaryCachePath(repoPath);
+    if (!fs$1.existsSync(filePath)) {
+        return { ok: true, removed: false };
+    }
+    try {
+        fs$1.unlinkSync(filePath);
+        return { ok: true, removed: true };
+    }
+    catch {
+        return { ok: false, removed: false };
+    }
+}
+function readEnvelopeOrUndefined(filePath) {
+    try {
+        if (!fs$1.existsSync(filePath))
+            return undefined;
+        const raw = fs$1.readFileSync(filePath, 'utf8');
+        return JSON.parse(raw);
+    }
+    catch {
+        return undefined;
+    }
+}
+function formatBytes(bytes) {
+    if (bytes < 1024)
+        return `${bytes} B`;
+    if (bytes < 1024 * 1024)
+        return `${(bytes / 1024).toFixed(1)} KB`;
+    return `${(bytes / 1024 / 1024).toFixed(2)} MB`;
+}
+const handler$8 = async (argv, logger) => {
+    const subcommand = argv.subcommand;
+    const repoPath = process.cwd();
+    const cachePath = getDiffSummaryCachePath(repoPath);
+    if (subcommand === 'clear') {
+        const result = clearDiffSummaryCache(repoPath);
+        if (!result.ok) {
+            logger.log(chalk.red(`Failed to clear diff-summary cache at ${cachePath}`));
+            process.exitCode = 1;
+            return;
+        }
+        if (result.removed) {
+            logger.log(chalk.green(`Cleared diff-summary cache at ${cachePath}`));
+        }
+        else {
+            logger.log(chalk.dim(`No diff-summary cache to clear (${cachePath})`));
+        }
+        return;
+    }
+    if (subcommand === 'info') {
+        const envelope = readEnvelopeOrUndefined(cachePath);
+        if (!envelope) {
+            logger.log(chalk.dim(`No diff-summary cache for this repo (${cachePath})`));
+            return;
+        }
+        const stat = fs$1.statSync(cachePath);
+        const entryCount = Object.keys(envelope.entries).length;
+        const totalSummaryTokens = Object.values(envelope.entries).reduce((sum, entry) => sum + entry.tokens, 0);
+        logger.log(chalk.bold('Diff-summary cache') + ` ${chalk.dim(cachePath)}`);
+        logger.log(`  ${chalk.green('entries')}            ${entryCount}`);
+        logger.log(`  ${chalk.green('on-disk size')}       ${formatBytes(stat.size)}`);
+        logger.log(`  ${chalk.green('summary tokens')}     ${totalSummaryTokens}`);
+        logger.log(`  ${chalk.green('last saved')}         ${envelope.savedAt}`);
+        return;
+    }
+    logger.log(chalk.red(`Unknown cache subcommand: ${subcommand}`));
+    logger.log(chalk.dim('Use one of: clear, info'));
+    process.exitCode = 1;
+};
+var cache = {
+    command: command$8,
+    desc: 'Manage the diff-summary cache (clear, info)',
+    builder: builder$8,
+    handler: commandExecutor(handler$8),
+};
 var util;
 (function (util) {
     util.assertEqual = (_) => { };
@@ -7505,6 +7815,56 @@ function getPathFromFilePath(filePath) {
     return filePath.split('/').slice(0, -1).join('/');
 }
+/**
+ * Adaptive backoff (#845, PR 3). Wraps the chain invocation so a
+ * transient 429 (rate limit) or 5xx no longer kills the whole
+ * pipeline — instead we wait briefly and retry up to N times
+ * before surfacing the failure.
+ *
+ * Cap is intentionally short. Diff condensing fans out to many
+ * concurrent calls; if rate limits hit hard, queueing requests
+ * indefinitely just makes the user wait longer for a result the
+ * pipeline ultimately handles via fewer concurrent passes anyway.
+ * 3 retries with 1s/2s/4s waits trade ~7s of worst-case extra
+ * latency for resilience to brief rate-limit blips.
+ */
+const BACKOFF_RETRIES = 3;
+const BACKOFF_BASE_MS = 1000;
+const BACKOFF_CAP_MS = 5000;
+function isRetryableError(error) {
+    if (!error || typeof error !== 'object')
+        return false;
+    const err = error;
+    if (err.status === 429 || err.status === 503 || err.status === 502 || err.status === 504) {
+        return true;
+    }
+    if (err.code === 429 || err.code === 'rate_limit_exceeded' || err.code === 'ECONNRESET' || err.code === 'ETIMEDOUT') {
+        return true;
+    }
+    if (typeof err.message === 'string' && /(rate.?limit|429|too many requests|timeout|temporarily unavailable)/i.test(err.message)) {
+        return true;
+    }
+    return false;
+}
+async function invokeWithBackoff(chain, input, logger) {
+    let lastError;
+    for (let attempt = 0; attempt <= BACKOFF_RETRIES; attempt++) {
+        try {
+            return await chain.invoke(input);
+        }
+        catch (error) {
+            lastError = error;
+            if (!isRetryableError(error) || attempt === BACKOFF_RETRIES) {
+                throw error;
+            }
+            const wait = Math.min(BACKOFF_CAP_MS, BACKOFF_BASE_MS * Math.pow(2, attempt));
+            logger?.verbose(`[summarize] retryable error (attempt ${attempt + 1}/${BACKOFF_RETRIES}); backing off ${wait}ms`, { color: 'yellow' });
+            await new Promise((resolve) => setTimeout(resolve, wait));
+        }
+    }
+    // Unreachable — the loop either returns or rethrows above.
+    throw lastError;
+}
 async function summarize(documents, { chain, textSplitter, options, logger, tokenizer, metadata }) {
     const { returnIntermediateSteps = false } = options || {};
     const docs = await textSplitter.splitDocuments(documents.map((doc) => new Document(doc)));
@@ -7512,10 +7872,10 @@ async function summarize(documents, { chain, textSplitter, options, logger, toke
         ? docs.reduce((sum, doc) => sum + tokenizer(doc.pageContent), 0)
         : undefined;
     const startedAt = Date.now();
-    const res = await chain.invoke({
+    const res = await invokeWithBackoff(chain, {
         input_documents: docs,
         returnIntermediateSteps,
-    });
+    }, logger);
     const elapsedMs = Date.now() - startedAt;
     logLlmCall(logger, {
         task: 'summarize',
@@ -7530,10 +7890,175 @@ async function summarize(documents, { chain, textSplitter, options, logger, toke
     return res.text && res.text.trim();
 }
+/**
+ * Inspect a unified-diff string and report its shape, or undefined
+ * if the diff isn't trivial (mixed +/- lines, weird headers, etc.).
+ *
+ * Detection rules (cheap on purpose — we're called per-file and the
+ * goal is to skip work, not be exhaustive):
+ *
+ *   - `Binary files ... differ` header → 'binary'
+ *   - `rename from`/`rename to` headers and no `+`/`-` content
+ *     lines → 'rename'
+ *   - All content lines are `+` (and at least one is) → 'addition'
+ *   - All content lines are `-` (and at least one is) → 'deletion'
+ *   - Otherwise → undefined (let the LLM handle it)
+ */
+function detectTrivialDiffShape(diff) {
+    if (!diff)
+        return undefined;
+    // Binary marker is unambiguous and short-circuits early.
+    if (/^Binary files .+ and .+ differ$/m.test(diff)) {
+        return 'binary';
+    }
+    // Pure rename: git emits `rename from` / `rename to` and no body.
+    // We require BOTH markers AND no `+`/`-` content lines. Some
+    // renames-with-edit show rename headers AND a hunk; those should
+    // fall through to the LLM path.
+    const hasRenameFrom = /^rename from /m.test(diff);
+    const hasRenameTo = /^rename to /m.test(diff);
+    if (hasRenameFrom && hasRenameTo) {
+        const hasContentChange = diff
+            .split('\n')
+            .some((line) => isContentChangeLine(line));
+        if (!hasContentChange) {
+            return 'rename';
+        }
+    }
+    // Walk the body once classifying content lines. We skip header
+    // lines (diff --git, index, ---, +++, @@, etc.) and only inspect
+    // the lines that represent actual change content.
+    let plus = 0;
+    let minus = 0;
+    for (const line of diff.split('\n')) {
+        if (isHeaderLine(line))
+            continue;
+        if (line.startsWith('+'))
+            plus++;
+        else if (line.startsWith('-'))
+            minus++;
+        // Context lines (' ' prefix) are ignored for shape classification:
+        // a pure addition can still have surrounding context if a hunk
+        // anchors at line 0, though `git diff` for a brand-new file
+        // typically has none.
+    }
+    if (plus > 0 && minus === 0)
+        return 'addition';
+    if (minus > 0 && plus === 0)
+        return 'deletion';
+    return undefined;
+}
+/**
+ * Build a deterministic summary string for a trivial diff. Returns
+ * undefined when the shape can't be templated (caller should fall
+ * back to the LLM path).
+ */
+function summarizeTrivialDiff(fileDiff) {
+    const shape = detectTrivialDiffShape(fileDiff.diff);
+    if (!shape)
+        return undefined;
+    const lineCount = countContentLines(fileDiff.diff, shape);
+    switch (shape) {
+        case 'addition':
+            return `Added \`${fileDiff.file}\` (${lineCount} line${lineCount === 1 ? '' : 's'}).`;
+        case 'deletion':
+            return `Removed \`${fileDiff.file}\` (${lineCount} line${lineCount === 1 ? '' : 's'}).`;
+        case 'rename': {
+            const oldPath = extractRenameOldPath(fileDiff.diff);
+            return oldPath
+                ? `Renamed \`${oldPath}\` → \`${fileDiff.file}\`.`
+                : `Renamed file to \`${fileDiff.file}\`.`;
+        }
+        case 'binary':
+            return `Updated binary file \`${fileDiff.file}\`.`;
+    }
+}
+function isHeaderLine(line) {
+    return (line.startsWith('diff --git') ||
+        line.startsWith('index ') ||
+        line.startsWith('--- ') ||
+        line.startsWith('+++ ') ||
+        line.startsWith('@@') ||
+        line.startsWith('new file mode') ||
+        line.startsWith('deleted file mode') ||
+        line.startsWith('similarity index') ||
+        line.startsWith('rename from ') ||
+        line.startsWith('rename to ') ||
+        line.startsWith('Binary files '));
+}
+function isContentChangeLine(line) {
+    if (isHeaderLine(line))
+        return false;
+    return line.startsWith('+') || line.startsWith('-');
+}
+function countContentLines(diff, shape) {
+    if (shape === 'binary' || shape === 'rename')
+        return 0;
+    const prefix = shape === 'addition' ? '+' : '-';
+    let count = 0;
+    for (const line of diff.split('\n')) {
+        if (isHeaderLine(line))
+            continue;
+        if (line.startsWith(prefix))
+            count++;
+    }
+    return count;
+}
+function extractRenameOldPath(diff) {
+    const match = diff.match(/^rename from (.+)$/m);
+    return match ? match[1].trim() : undefined;
+}
+/**
+ * Cache opt-out: COCO_NO_CACHE=1 disables both reads and writes
+ * for the diff-summary cache (#845, PR 5). Default is enabled.
+ */
+function isCacheEnabled$1() {
+    return !process.env.COCO_NO_CACHE || process.env.COCO_NO_CACHE === '0';
+}
 /**
  * Summarize a single file diff that exceeds the token threshold.
+ *
+ * Trivial-shape short-circuit (#845, PR 2): pure additions / deletions
+ * / renames / binary changes have no information content beyond the
+ * diff's shape, so we templated-summarize them instead of paying for
+ * an LLM call. On initial-commit fixtures (lots of pure adds) this
+ * collapses the per-file summary phase entirely; the resulting tiny
+ * synthetic summaries usually drop the directory token totals under
+ * budget so wave consolidation skips too.
  */
 async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, logger, metadata, }) {
+    const trivialSummary = summarizeTrivialDiff(fileDiff);
+    if (trivialSummary !== undefined) {
+        logger.verbose(` - ${fileDiff.file}: trivial-shape skip (no LLM call)`, { color: 'gray' });
+        return {
+            ...fileDiff,
+            diff: trivialSummary,
+            tokenCount: tokenizer(trivialSummary),
+        };
+    }
+    // Cache lookup (#845, PR 5). Keyed on the file's literal diff
+    // content + the active model + the summarization prompt hash.
+    // A hit returns the prior summary instantly; on iterative
+    // `coco commit` re-runs after small edits, the unchanged files
+    // never go to the LLM.
+    const cacheModel = typeof metadata?.model === 'string' ? metadata.model : undefined;
+    const cacheRepo = process.cwd();
+    const cacheKey = isCacheEnabled$1() && cacheModel
+        ? diffSummaryKey(fileDiff.diff, cacheModel, SUMMARIZE_PROMPT_HASH)
+        : undefined;
+    if (cacheKey) {
+        const cached = readDiffSummary(cacheRepo, cacheKey);
+        if (cached) {
+            logger.verbose(` - ${fileDiff.file}: cache hit (skipped LLM, ${cached.tokens} tokens)`, { color: 'cyan' });
+            touchDiffSummary(cacheRepo, cacheKey);
+            return {
+                ...fileDiff,
+                diff: cached.summary,
+                tokenCount: cached.tokens,
+            };
+        }
+    }
     try {
         const fileSummary = await summarize([
             {
@@ -7557,6 +8082,13 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
             },
         });
         const newTokenCount = tokenizer(fileSummary);
+        if (cacheKey && cacheModel) {
+            writeDiffSummary(cacheRepo, cacheKey, {
+                summary: fileSummary,
+                model: cacheModel,
+                tokens: newTokenCount,
+            });
+        }
         return {
             ...fileDiff,
             diff: fileSummary,
@@ -7570,16 +8102,41 @@ async function summarizeFileDiff(fileDiff, { chain, textSplitter, tokenizer, log
     }
 }
 /**
- * Process files in waves to respect concurrency limits.
+ * Continuous-queue scheduler (#845, PR 4). Mirrors the directory-
+ * level scheduler in `summarizeDiffs.ts` and replaces the previous
+ * fixed-wave Promise.all loop, which made the slowest call in
+ * each wave block the next wave from starting. With realistic LLM
+ * tail variance, that wave-locking adds dead time at every wave
+ * boundary; continuous queue fills slots as in-flight calls
+ * resolve, so the wall-clock tracks the slowest *call*, not the
+ * sum of slowest-per-wave.
  */
 async function processInWaves$1(items, processor, maxConcurrent) {
-    const results = [];
-    for (let i = 0; i < items.length; i += maxConcurrent) {
-        const wave = items.slice(i, i + maxConcurrent);
-        const waveResults = await Promise.all(wave.map(processor));
-        results.push(...waveResults);
-    }
-    return results;
+    const limit = createLimit$2(maxConcurrent);
+    return Promise.all(items.map((item) => limit(() => processor(item))));
+}
+function createLimit$2(maxConcurrent) {
+    const limit = Math.max(1, maxConcurrent);
+    let active = 0;
+    const queue = [];
+    const runNext = () => {
+        active--;
+        const next = queue.shift();
+        if (next)
+            next();
+    };
+    return async (operation) => {
+        if (active >= limit) {
+            await new Promise((resolve) => queue.push(resolve));
+        }
+        active++;
+        try {
+            return await operation();
+        }
+        finally {
+            runNext();
+        }
+    };
 }
 /**
  * Pre-summarize individual files that exceed the maxFileTokens threshold.
@@ -7644,6 +8201,13 @@ async function preprocessLargeFiles(rootNode, options) {
     return rebuildNode(rootNode);
 }
+/**
+ * Cache opt-out: COCO_NO_CACHE=1 disables both reads and writes
+ * for the diff-summary cache (#845, PR 5). Default is enabled.
+ */
+function isCacheEnabled() {
+    return !process.env.COCO_NO_CACHE || process.env.COCO_NO_CACHE === '0';
+}
 /**
  * Create groups from a given node info.
  * @param {DiffNode} node - The node info to start grouping.
@@ -7669,6 +8233,32 @@ function createDirectoryDiffs(node) {
  * Summarize a directory diff asynchronously.
  */
 async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenizer, logger, metadata }) {
+    // Cache lookup (#845, PR 5). Joined per-file diffs become the
+    // payload signature; if every file in the directory is unchanged
+    // since the last run (and the model + prompt match), the prior
+    // directory-level summary is reused instead of paying for another
+    // map_reduce pass.
+    const cacheModel = typeof metadata?.model === 'string' ? metadata.model : undefined;
+    const cacheRepo = process.cwd();
+    const cachePayload = directory.diffs
+        .map((diff) => `${diff.file}\x1e${diff.diff}`)
+        .join('\x1d');
+    const cacheKey = isCacheEnabled() && cacheModel
+        ? diffSummaryKey(cachePayload, cacheModel, SUMMARIZE_PROMPT_HASH)
+        : undefined;
+    if (cacheKey) {
+        const cached = readDiffSummary(cacheRepo, cacheKey);
+        if (cached) {
+            logger?.verbose?.(` • Cache hit for "/${directory.path}" (skipped LLM, ${cached.tokens} tokens)`, { color: 'cyan' });
+            touchDiffSummary(cacheRepo, cacheKey);
+            return {
+                diffs: directory.diffs,
+                path: directory.path,
+                summary: cached.summary,
+                tokenCount: cached.tokens,
+            };
+        }
+    }
     try {
         const directorySummary = await summarize(directory.diffs.map((diff) => ({
             pageContent: diff.diff,
@@ -7690,6 +8280,13 @@ async function summarizeDirectoryDiff(directory, { chain, textSplitter, tokenize
             },
         });
         const newTokenTotal = tokenizer(directorySummary);
+        if (cacheKey && cacheModel) {
+            writeDiffSummary(cacheRepo, cacheKey, {
+                summary: directorySummary,
+                model: cacheModel,
+                tokens: newTokenTotal,
+            });
+        }
         return {
             diffs: directory.diffs,
             path: directory.path,
@@ -7724,66 +8321,99 @@ const defaultOutputCallback = (group) => {
     return output;
 };
 /**
- * Process directory summarization in waves to respect concurrency limits
- * while maintaining predictable behavior.
+ * Continuous-queue scheduler for the directory summarization pass
+ * (#845, PR 4). The previous wave-by-wave Promise.all forced the
+ * scheduler to wait for the slowest call in a wave before starting
+ * the next wave; on a fixture like `refactor` (20 directories, mixed
+ * sizes) one big directory could pin the wave at ~its own latency
+ * even though the other 19 calls finished long before.
+ *
+ * The continuous queue dispatches all eligible directories through
+ * a `createLimit(maxConcurrent)` semaphore — same primitive
+ * `collectDiffs` already uses. As soon as any in-flight summary
+ * resolves, the next eligible directory takes its slot. Each
+ * scheduled call also re-checks the budget at the moment it would
+ * fire; if the budget is already met (because earlier completions
+ * dropped the total under maxTokens), it returns the original
+ * directory without an LLM call. So the work scales with what's
+ * actually needed, not with the worst-case wave count.
+ *
+ * Order discipline is preserved: directories are sorted by token
+ * count descending and dispatched in that order. The biggest
+ * candidates land in the first batch of in-flight calls; as smaller
+ * candidates reach the queue front, the budget is more likely to
+ * already be met and they short-circuit.
  */
 async function summarizeInWaves(directories, options) {
     const { totalTokenCount: initialTotal, maxTokens, minTokensForSummary, maxConcurrent, logger, chain, textSplitter, tokenizer, metadata, } = options;
     let totalTokenCount = initialTotal;
     const results = [...directories];
-    // Create sorted indices by token count (descending) for prioritized processing
-    const sortedIndices = directories
+    // Pick eligible directories upfront, sorted big-first.
+    const eligibleIndices = directories
         .map((d, i) => ({ index: i, tokens: d.tokenCount }))
-        .sort((a, b) => b.tokens - a.tokens);
-    let cursor = 0;
-    while (totalTokenCount > maxTokens && cursor < sortedIndices.length) {
-        // Select wave candidates: directories that exceed minTokensForSummary
-        const wave = [];
-        for (let i = cursor; i < sortedIndices.length && wave.length < maxConcurrent; i++) {
-            const { index, tokens } = sortedIndices[i];
-            // Skip directories below the minimum threshold
-            if (tokens < minTokensForSummary) {
-                cursor = i + 1;
-                continue;
-            }
-            // Skip directories that have already been summarized
-            if (results[index].summary) {
-                cursor = i + 1;
-                continue;
-            }
-            wave.push(index);
-            cursor = i + 1;
-        }
-        // No more eligible candidates
-        if (wave.length === 0) {
-            break;
+        .filter((entry) => entry.tokens >= minTokensForSummary && !results[entry.index].summary)
+        .sort((a, b) => b.tokens - a.tokens)
+        .map((entry) => entry.index);
+    if (eligibleIndices.length === 0 || totalTokenCount <= maxTokens) {
+        return { directories: results, totalTokenCount };
+    }
+    const limit = createLimit$1(maxConcurrent);
+    logger.verbose(`\nProcessing ${eligibleIndices.length} directories with continuous queue (concurrency ${maxConcurrent})...`, { color: 'blue' });
+    await Promise.all(eligibleIndices.map((idx) => limit(async () => {
+        // Re-check the budget at dispatch time. Earlier completions
+        // may have already dropped the total under the cap; in that
+        // case skip the LLM call entirely.
+        if (totalTokenCount <= maxTokens) {
+            return;
         }
-        logger.verbose(`\nProcessing wave of ${wave.length} directories...`, { color: 'blue' });
-        // Process wave in parallel
-        const waveResults = await Promise.all(wave.map((idx) => summarizeDirectoryDiff(results[idx], { chain, textSplitter, tokenizer, logger, metadata })));
-        // Update results and recalculate total
-        waveResults.forEach((result, i) => {
-            const idx = wave[i];
-            const originalTokens = results[idx].tokenCount;
-            const newTokens = result.tokenCount;
-            const reduction = originalTokens - newTokens;
-            totalTokenCount -= reduction;
-            results[idx] = result;
-            logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
-                color: 'magenta',
-            });
+        const result = await summarizeDirectoryDiff(results[idx], {
+            chain,
+            textSplitter,
+            tokenizer,
+            logger,
+            metadata,
         });
-        logger.verbose(`Total token count: ${totalTokenCount}`, {
-            color: totalTokenCount > maxTokens ? 'yellow' : 'green',
+        const originalTokens = results[idx].tokenCount;
+        const newTokens = result.tokenCount;
+        totalTokenCount -= (originalTokens - newTokens);
+        results[idx] = result;
+        logger.verbose(` • Summarized "/${result.path}": ${originalTokens} -> ${newTokens} tokens`, {
+            color: 'magenta',
         });
-        // Check if we're now under budget
-        if (totalTokenCount <= maxTokens) {
-            logger.verbose(`Under token budget, stopping summarization.`, { color: 'green' });
-            break;
-        }
-    }
+    })));
+    logger.verbose(`Total token count after continuous queue: ${totalTokenCount}`, {
+        color: totalTokenCount > maxTokens ? 'yellow' : 'green',
+    });
     return { directories: results, totalTokenCount };
 }
+/**
+ * Tiny semaphore mirroring `collectDiffs.createLimit` (kept private
+ * here to avoid a cross-module import for one helper). Schedules at
+ * most `maxConcurrent` operations concurrently; the rest queue FIFO.
+ */
+function createLimit$1(maxConcurrent) {
+    const limit = Math.max(1, maxConcurrent);
+    let active = 0;
+    const queue = [];
+    const runNext = () => {
+        active--;
+        const next = queue.shift();
+        if (next)
+            next();
+    };
+    return async (operation) => {
+        if (active >= limit) {
+            await new Promise((resolve) => queue.push(resolve));
+        }
+        active++;
+        try {
+            return await operation();
+        }
+        finally {
+            runNext();
+        }
+    };
+}
 /**
  * Summarize diffs using a three-phase approach:
  *
@@ -7797,7 +8427,16 @@ async function summarizeInWaves(directories, options) {
  * - Efficient parallel processing with predictable behavior
  * - Early exit when under token budget
  */
-async function summarizeDiffs(rootDiffNode, { tokenizer, logger, maxTokens = 2048, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
+async function summarizeDiffs(rootDiffNode, { tokenizer, logger,
+// Default raised to 4096 (#845) so the budget matches the
+// canonical service configs in `langchain/utils.ts`. The
+// previous 2048 default came from an earlier era when 4k
+// context was a stretch for fast models; today every shipped
+// service overrides it to 4096 anyway. Keeping this in sync
+// with the service defaults means a caller that omits
+// `maxTokens` doesn't accidentally fall into a tighter budget
+// than the rest of the system assumes.
+maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6, textSplitter, chain, metadata, handleOutput = defaultOutputCallback, }) {
     // Calculate maxFileTokens as 25% of maxTokens if not specified
     const effectiveMaxFileTokens = maxFileTokens ?? Math.floor(maxTokens * 0.25);
     // PHASE 1: Directory grouping & assessment
@@ -10816,10 +11455,17 @@ async function fileChangeParser({ changes, commit, options: { tokenizer, git, ll
     // 1. Pre-process large files to prevent bias
     // 2. Group by directory and assess token count
     // 3. Wave-based parallel summarization until under budget
+    //
+    // The 4096 fallback (#845) matches the default service configs
+    // for openai / anthropic / ollama (`langchain/utils.ts`). It's a
+    // safety net for users with custom service definitions that omit
+    // `tokenLimit` — without it those users hit a degenerate 2048
+    // budget that triggers needless pre-summarization on diffs the
+    // model could absorb whole.
     logger.startTimer();
     const summary = await summarizeDiffs(diffs, {
         tokenizer,
-        maxTokens: maxTokens || 2048,
+        maxTokens: maxTokens || 4096,
         minTokensForSummary,
         maxFileTokens,
         maxConcurrent,
@@ -29235,6 +29881,7 @@ y.command(init.command, init.desc, init.builder, init.handler);
 y.command(doctor.command, doctor.desc, doctor.builder, doctor.handler);
 y.command(log.command, log.desc, log.builder, log.handler);
 y.command(ui.command, ui.desc, ui.builder, ui.handler);
+y.command(cache.command, cache.desc, cache.builder, cache.handler);
 y.help().parse(process.argv.slice(2));
 /**
@@ -29686,4 +30333,4 @@ var commitValidationHandler = /*#__PURE__*/Object.freeze({
   handleValidationErrors: handleValidationErrors
 });
-export { changelog, commit, doctor, init, log, recap, types, ui };
+export { cache, changelog, commit, doctor, init, log, recap, types, ui };