npm - diffdoc - Versions diffs - 0.1.1 → 0.3.0 - Mend

diffdoc 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.diffdocrc.example +4 -1
package/README.md +83 -10
package/dist/commands/embed.js +110 -24
package/dist/commands/query.js +18 -64
package/dist/commands/summarize.js +264 -37
package/dist/config.js +24 -0
package/dist/index.js +19 -2
package/dist/mcp.js +133 -0
package/dist/services/retrieval.js +86 -0
package/dist/types/artifacts.js +5 -0
package/dist/utils/git.js +1 -5
package/package.json +6 -3

package/.diffdocrc.example CHANGED Viewed

@@ -8,5 +8,8 @@
   "cloudLlmEndpoint": "https://api.openai.com/v1",
   "cloudChatModel": "gpt-4o-mini",
   "cloudEmbedModel": "text-embedding-3-small",
-  "openaiApiKey": ""
+  "openaiApiKey": "",
+  "includeGlobs": [],
+  "excludeGlobs": [],
+  "ignoreFile": ".diffdocignore"
 }

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 ## Project Description
-DiffDoc turns source code into searchable, plain-English project context. It scans repository files, asks an OpenAI-compatible chat model to summarize the business behavior in each file, stores those summaries in a portable JSON manifest, embeds the manifest into a local Vectra index, and answers questions using the indexed results as retrieval context.
+DiffDoc turns source code into searchable, plain-English project context. It scans repository files, asks an OpenAI-compatible chat model to summarize the business behavior in each file, stores the summaries as portable per-hash JSON assets, embeds those assets into a local Vectra index, and answers questions using the indexed results as retrieval context.
 The project is designed for teams that need fast codebase comprehension without requiring every stakeholder to read implementation details. It can run against local model servers such as Ollama, LM Studio, or vLLM, or against cloud OpenAI-compatible APIs.
@@ -37,7 +37,8 @@ Package scripts can call the installed binary:
     "diffdoc:summarize": "diffdoc summarize",
     "diffdoc:embed": "diffdoc embed",
     "diffdoc:search": "diffdoc search",
-    "diffdoc:query": "diffdoc query"
+    "diffdoc:query": "diffdoc query",
+    "diffdoc:mcp": "diffdoc-mcp"
   }
 }
 ```
@@ -71,31 +72,42 @@ Example config with all supported keys:
   "cloudLlmEndpoint": "https://api.openai.com/v1",
   "cloudChatModel": "gpt-4o-mini",
   "cloudEmbedModel": "text-embedding-3-small",
-  "openaiApiKey": ""
+  "openaiApiKey": "",
+  "includeGlobs": [],
+  "excludeGlobs": [],
+  "ignoreFile": ".diffdocignore"
 }
 ```
-Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, and `OPENAI_API_KEY`.
+Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, `OPENAI_API_KEY`, `DIFFDOC_INCLUDE_GLOBS`, `DIFFDOC_EXCLUDE_GLOBS`, and `DIFFDOC_IGNORE_FILE`.
 ## Manifest-First Design
-DiffDoc separates summarization from embedding. The `summarize` command writes all generated file summaries to `manifest.json` under `baseDir`, usually `./.diffdoc/manifest.json`.
+DiffDoc separates summarization from embedding. The `summarize` command writes file-to-hash mappings to `manifest.json` and stores each summary in an independent hash-addressed JSON file under `./.diffdoc/summaries/`.
 The manifest is plain JSON and contains one entry per tracked file:
 ```json
 {
+  "schemaVersion": 2,
   "lastSyncedCommit": "string-hash",
   "files": {
-    "src/example.ts": {
-      "hash": "md5-string",
-      "summaryText": "Plain-English explanation text here.",
-      "rawCodeSnapshot": "Full code text here..."
-    }
+    "src/example.ts": "md5-string"
   }
 }
 ```
+Example summary asset at `./.diffdoc/summaries/<hash>.json`:
+```json
+{
+  "schemaVersion": 1,
+  "content_hash": "md5-string",
+  "summary": "Plain-English explanation text here.",
+  "raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
+}
+```
 Because the summaries are stored independently, users do not have to embed immediately. They can review, archive, transform, or embed the manifest later using their preferred vectorization model and storage solution.
 DiffDoc includes `diffdoc embed` as a built-in convenience path for creating a local Vectra index, but the manifest can also be consumed by other tools such as custom OpenAI-compatible embedding pipelines, hosted vector databases, local search systems, or internal documentation workflows.
@@ -114,12 +126,30 @@ Summarize only changed Git files using the existing manifest state:
 diffdoc summarize --path . --mode delta
 ```
+Store raw code snapshots in summary assets:
+```bash
+diffdoc summarize --path . --mode all --include-code-snapshot
+```
+Add include/exclude filters at runtime:
+```bash
+diffdoc summarize --path . --mode all --include-glob "src/**/*.ts" --exclude-glob "**/*.test.ts"
+```
 Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
 ```bash
 diffdoc embed
 ```
+Force full index rebuild:
+```bash
+diffdoc embed --rebuild
+```
 Search the local Vectra index and print raw matches:
 ```bash
@@ -180,10 +210,53 @@ diffdoc summarize --path . --mode delta
 diffdoc embed
 ```
+## GitHub Actions
+This repository includes a workflow at `.github/workflows/diffdoc-summarize.yml` that runs on pushes to `main`. It installs the project, builds the CLI, runs delta summarization, and commits `.diffdoc/manifest.json` back to the branch when the manifest changes.
+The workflow intentionally ignores `.diffdoc/manifest.json` and `.diffdoc/vectra/**` changes as triggers so the bot commit does not create a loop.
+Configure the same values used by the CLI as GitHub Actions variables or secrets, such as `AI_PROVIDER`, `LOCAL_LLM_ENDPOINT`, `LOCAL_CHAT_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, and `OPENAI_API_KEY`. The workflow uses the environment-variable fallback path in DiffDoc, so no `.diffdocrc` file is required in CI.
+## MCP Server
+DiffDoc also ships a local MCP stdio server as `diffdoc-mcp`. This lets MCP-compatible agents search or answer questions against the local Vectra index directly.
+Run it manually with the same config style as the CLI:
+```bash
+diffdoc-mcp --config ./.diffdocrc
+```
+Example MCP client configuration:
+```json
+{
+  "mcpServers": {
+    "diffdoc": {
+      "command": "npx",
+      "args": ["diffdoc-mcp", "--config", "./.diffdocrc"]
+    }
+  }
+}
+```
+If DiffDoc is installed as a project dev dependency, the same `npx diffdoc-mcp` command will resolve the local package binary.
+Available MCP tools:
+- `diffdoc_search`: searches the local Vectra index and returns raw file matches, summaries, scores, hashes, and optional code snapshots.
+- `diffdoc_answer`: retrieves relevant index context and asks the configured chat model to answer the question.
+- `diffdoc_index_stats`: returns the Vectra index path, whether it exists, and the indexed item count.
+Run `diffdoc summarize` and `diffdoc embed` before using the MCP server, otherwise the search and answer tools will not have a local index to query.
 ## Notes
 - Node.js `>=22` is required because Vectra requires it.
 - This repository ignores `.diffdoc/vectra` and `.diffdocrc`; add similar entries to your project's `.gitignore` if you do not want generated indexes or local config committed. The manifest at `.diffdoc/manifest.json` is not ignored by this repository.
+- Summary assets are written to `.diffdoc/summaries/*.json`.
+- Manifest schema is currently `schemaVersion: 2`; older manifest shapes are not auto-migrated.
 - Commit `.diffdoc/manifest.json` when using delta workflows. Delta summarization reads the previous manifest state to decide which changed files need fresh summaries.
 - `summarize` requires a configured chat model.
 - `embed` requires a configured embedding model.

package/dist/commands/embed.js CHANGED Viewed

@@ -8,56 +8,142 @@ exports.runEmbed = runEmbed;
 const promises_1 = __importDefault(require("node:fs/promises"));
 const node_path_1 = __importDefault(require("node:path"));
 const vectra_1 = require("vectra");
+const artifacts_1 = require("../types/artifacts");
 const llm_1 = require("../utils/llm");
 const paths_1 = require("../utils/paths");
 const VECTRA_INDEX_DIR = "vectra";
 function getVectraIndexPath(config) {
     return node_path_1.default.resolve((0, paths_1.getDiffdocBaseDir)(config.baseDir), VECTRA_INDEX_DIR);
 }
+function getSummaryDir(manifestPath) {
+    return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
+}
+function getSummaryPath(summaryDir, hash) {
+    return node_path_1.default.resolve(summaryDir, `${hash}.json`);
+}
+async function readManifest(manifestPath) {
+    const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
+    if (parsed.schemaVersion !== artifacts_1.MANIFEST_SCHEMA_VERSION) {
+        throw new Error(`Unsupported manifest schema in ${manifestPath}. Expected schemaVersion ${artifacts_1.MANIFEST_SCHEMA_VERSION}.`);
+    }
+    return {
+        schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
+        lastSyncedCommit: typeof parsed.lastSyncedCommit === "string" ? parsed.lastSyncedCommit : "",
+        files: parsed.files && typeof parsed.files === "object" ? parsed.files : {}
+    };
+}
+async function readSummaryAsset(summaryPath) {
+    const parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
+    if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
+        throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion ${artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION}.`);
+    }
+    if (typeof parsed.content_hash !== "string") {
+        throw new Error(`Invalid summary hash in ${summaryPath}.`);
+    }
+    if (typeof parsed.summary !== "string") {
+        throw new Error(`Invalid summary text in ${summaryPath}.`);
+    }
+    return {
+        schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
+        content_hash: parsed.content_hash,
+        summary: parsed.summary,
+        raw_code_snapshot: typeof parsed.raw_code_snapshot === "string" ? parsed.raw_code_snapshot : undefined
+    };
+}
 function buildDocument(filePath, summaryText, rawCodeSnapshot) {
-    return `File: ${filePath}\n` +
-        `Summary: ${summaryText}\n\n` +
-        `Code Snapshot:\n\`\`\`\n${rawCodeSnapshot}\n\`\`\``;
+    let output = `File: ${filePath}\nSummary: ${summaryText}`;
+    if (rawCodeSnapshot) {
+        output += `\n\nCode Snapshot:\n\`\`\`\n${rawCodeSnapshot}\n\`\`\``;
+    }
+    return output;
 }
 async function runEmbed(options, config) {
     const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
-    const manifest = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
+    const manifest = await readManifest(manifestPath);
     const entries = Object.entries(manifest.files);
+    const summaryDir = getSummaryDir(manifestPath);
     const indexPath = getVectraIndexPath(config);
     const index = new vectra_1.LocalIndex(indexPath);
-    await index.createIndex({
-        version: 1,
-        deleteIfExists: true,
-        metadata_config: {
-            indexed: ["filePath", "hash"]
+    if (options.rebuild) {
+        await index.createIndex({
+            version: 1,
+            deleteIfExists: true,
+            metadata_config: {
+                indexed: ["filePath", "hash"]
+            }
+        });
+    }
+    else if (!await index.isIndexCreated()) {
+        await index.createIndex({
+            version: 1,
+            deleteIfExists: false,
+            metadata_config: {
+                indexed: ["filePath", "hash"]
+            }
+        });
+    }
+    const existingItems = await index.listItems();
+    const existingByPath = new Map(existingItems.map((item) => [item.id, item]));
+    const toUpsert = [];
+    for (const [filePath, hash] of entries) {
+        const existing = existingByPath.get(filePath);
+        if (existing?.metadata.hash === hash) {
+            continue;
+        }
+        const summaryPath = getSummaryPath(summaryDir, hash);
+        const summaryAsset = await readSummaryAsset(summaryPath);
+        if (summaryAsset.content_hash !== hash) {
+            throw new Error(`Hash mismatch in summary asset ${summaryPath}.`);
         }
-    });
-    if (entries.length === 0) {
-        console.log(`Created empty Vectra index at ${indexPath}.`);
+        toUpsert.push({
+            filePath,
+            hash,
+            summaryText: summaryAsset.summary,
+            rawCodeSnapshot: summaryAsset.raw_code_snapshot,
+            document: buildDocument(filePath, summaryAsset.summary, summaryAsset.raw_code_snapshot)
+        });
+    }
+    const activePathSet = new Set(entries.map(([filePath]) => filePath));
+    const toDelete = existingItems
+        .map((item) => item.id)
+        .filter((id) => Boolean(id) && !activePathSet.has(id));
+    if (toUpsert.length === 0 && toDelete.length === 0) {
+        console.log(`Index is already up to date at ${indexPath}.`);
         return;
     }
-    const documents = entries.map(([filePath, file]) => buildDocument(filePath, file.summaryText, file.rawCodeSnapshot));
-    const embeddings = await (0, llm_1.generateEmbeddings)(documents, config.embeddings);
+    const embeddings = toUpsert.length > 0
+        ? await (0, llm_1.generateEmbeddings)(toUpsert.map((item) => item.document), config.embeddings)
+        : [];
     await index.beginUpdate();
     try {
-        for (let i = 0; i < entries.length; i += 1) {
-            const [filePath, file] = entries[i];
+        for (let i = 0; i < toUpsert.length; i += 1) {
+            const item = toUpsert[i];
+            const metadata = item.rawCodeSnapshot
+                ? {
+                    filePath: item.filePath,
+                    hash: item.hash,
+                    summaryText: item.summaryText,
+                    rawCodeSnapshot: item.rawCodeSnapshot
+                }
+                : {
+                    filePath: item.filePath,
+                    hash: item.hash,
+                    summaryText: item.summaryText
+                };
             await index.upsertItem({
-                id: filePath,
+                id: item.filePath,
                 vector: embeddings[i],
-                metadata: {
-                    filePath,
-                    hash: file.hash,
-                    summaryText: file.summaryText,
-                    rawCodeSnapshot: file.rawCodeSnapshot
-                }
+                metadata
             });
         }
+        for (const itemId of toDelete) {
+            await index.deleteItem(itemId);
+        }
         await index.endUpdate();
     }
     catch (error) {
         index.cancelUpdate();
         throw error;
     }
-    console.log(`Embedded ${entries.length} summaries into Vectra index at ${indexPath}.`);
+    console.log(`Embedded ${toUpsert.length} summaries and pruned ${toDelete.length} items in ${indexPath}.`);
 }

package/dist/commands/query.js CHANGED Viewed

@@ -2,95 +2,49 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.runQuery = runQuery;
 exports.runSearch = runSearch;
-const vectra_1 = require("vectra");
-const llm_1 = require("../utils/llm");
-const embed_1 = require("./embed");
-const CODE_QUERY_PREFIX = "Represent this query for searching relevant code: ";
-function parseTopK(value) {
-    const topK = Number.parseInt(value, 10);
-    if (!Number.isInteger(topK) || topK < 1) {
-        throw new Error("Invalid --top value. Expected a positive integer.");
-    }
-    return topK;
-}
-function trimForDisplay(text, maxLength) {
-    if (text.length <= maxLength) {
-        return text;
-    }
-    return `${text.slice(0, maxLength).trimEnd()}...`;
-}
-function buildAnswerPrompt(question, results) {
-    const context = results.map((result, indexPosition) => {
-        const metadata = result.item.metadata;
-        return [
-            `Result ${indexPosition + 1}`,
-            `File: ${metadata.filePath}`,
-            `Score: ${result.score}`,
-            `Summary:\n${metadata.summaryText}`,
-            `Code Snapshot:\n${metadata.rawCodeSnapshot}`
-        ].join("\n");
-    }).join("\n\n---\n\n");
-    return `Answer the user's question using only the retrieved DiffDoc results below. If the results do not contain enough information, say what is missing. Prefer a direct answer first, then cite the relevant file paths. Keep the explanation appropriate to the question: summarize when asked for a summary, explain implementation details when asked how something works, and avoid unsupported claims.\n\nUser question:\n${question}\n\nRetrieved results:\n${context}`;
-}
+const retrieval_1 = require("../services/retrieval");
 async function runQuery(message, options, config) {
-    const topK = parseTopK(options.top);
-    const indexPath = (0, embed_1.getVectraIndexPath)(config);
-    const index = new vectra_1.LocalIndex(indexPath);
-    if (!await index.isIndexCreated()) {
-        throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
-    }
-    const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
-    const results = await index.queryItems(queryVector, message, topK);
-    if (results.length === 0) {
-        console.log("No matching embedded summaries found.");
+    const topK = (0, retrieval_1.parseTopK)(options.top);
+    const answerResult = await (0, retrieval_1.answerFromIndex)(message, topK, config);
+    console.log(answerResult.answer);
+    if (answerResult.sources.length === 0) {
         return;
     }
-    const answer = await (0, llm_1.promptLlm)(buildAnswerPrompt(message, results), config.chat);
-    console.log(answer);
     console.log("\nSources:");
-    for (const [indexPosition, result] of results.entries()) {
-        const metadata = result.item.metadata;
-        console.log(`${indexPosition + 1}. ${metadata.filePath} (${result.score.toFixed(4)})`);
+    for (const [indexPosition, source] of answerResult.sources.entries()) {
+        console.log(`${indexPosition + 1}. ${source.filePath} (${source.score.toFixed(4)})`);
     }
     if (!options.code) {
         return;
     }
-    for (const [indexPosition, result] of results.entries()) {
-        const metadata = result.item.metadata;
-        console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
+    for (const [indexPosition, result] of answerResult.results.entries()) {
+        console.log(`\n#${indexPosition + 1} ${result.filePath}`);
         console.log(`Score: ${result.score.toFixed(4)}`);
-        console.log(`Hash: ${metadata.hash}`);
+        console.log(`Hash: ${result.hash}`);
         console.log("Summary:");
-        console.log(trimForDisplay(metadata.summaryText, 1200));
+        console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
         if (options.code) {
             console.log("Code Snapshot:");
-            console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
+            console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot || "(not stored)", 2000));
         }
     }
 }
 async function runSearch(message, options, config) {
-    const topK = parseTopK(options.top);
-    const indexPath = (0, embed_1.getVectraIndexPath)(config);
-    const index = new vectra_1.LocalIndex(indexPath);
-    if (!await index.isIndexCreated()) {
-        throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
-    }
-    const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
-    const results = await index.queryItems(queryVector, message, topK);
+    const topK = (0, retrieval_1.parseTopK)(options.top);
+    const results = await (0, retrieval_1.searchIndex)(message, topK, config);
     if (results.length === 0) {
         console.log("No matching embedded summaries found.");
         return;
     }
     for (const [indexPosition, result] of results.entries()) {
-        const metadata = result.item.metadata;
-        console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
+        console.log(`\n#${indexPosition + 1} ${result.filePath}`);
         console.log(`Score: ${result.score.toFixed(4)}`);
-        console.log(`Hash: ${metadata.hash}`);
+        console.log(`Hash: ${result.hash}`);
         console.log("Summary:");
-        console.log(trimForDisplay(metadata.summaryText, 1200));
+        console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
         if (options.code) {
             console.log("Code Snapshot:");
-            console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
+            console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot || "(not stored)", 2000));
         }
     }
 }

package/dist/commands/summarize.js CHANGED Viewed

@@ -6,60 +6,240 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.runSummarize = runSummarize;
 const promises_1 = __importDefault(require("node:fs/promises"));
 const node_path_1 = __importDefault(require("node:path"));
+const artifacts_1 = require("../types/artifacts");
 const git_1 = require("../utils/git");
 const hashing_1 = require("../utils/hashing");
 const llm_1 = require("../utils/llm");
 const paths_1 = require("../utils/paths");
-const TARGET_EXTENSIONS = new Set([".ts", ".js", ".cs", ".py"]);
-const IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist"]);
-const IGNORED_FILES = new Set(["package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb"]);
 function normalizeRelativePath(filePath) {
     return filePath.split(node_path_1.default.sep).join("/");
 }
-function isTargetCodeFile(filePath) {
-    return TARGET_EXTENSIONS.has(node_path_1.default.extname(filePath)) && !IGNORED_FILES.has(node_path_1.default.basename(filePath));
+function getSummaryDir(manifestPath) {
+    return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
+}
+function getSummaryPath(summaryDir, hash) {
+    return node_path_1.default.resolve(summaryDir, `${hash}.json`);
+}
+function normalizeGlobPattern(pattern) {
+    return pattern.split(node_path_1.default.sep).join("/");
+}
+function escapeRegex(value) {
+    return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
+}
+function globToRegExp(pattern) {
+    const normalized = normalizeGlobPattern(pattern);
+    let regexBody = "";
+    for (let i = 0; i < normalized.length; i += 1) {
+        const char = normalized[i];
+        const next = normalized[i + 1];
+        if (char === "*" && next === "*") {
+            regexBody += ".*";
+            i += 1;
+            continue;
+        }
+        if (char === "*") {
+            regexBody += "[^/]*";
+            continue;
+        }
+        if (char === "?") {
+            regexBody += "[^/]";
+            continue;
+        }
+        regexBody += escapeRegex(char);
+    }
+    return new RegExp(`^${regexBody}$`);
+}
+function compileGlobs(patterns) {
+    return patterns.filter(Boolean).map(globToRegExp);
+}
+function matchesAny(filePath, patterns) {
+    return patterns.some((pattern) => pattern.test(filePath));
+}
+function shouldIncludeFile(filePath, includeGlobs, excludeGlobs, ignoreGlobs) {
+    if (includeGlobs.length > 0 && !matchesAny(filePath, includeGlobs)) {
+        return false;
+    }
+    if (excludeGlobs.length > 0 && matchesAny(filePath, excludeGlobs)) {
+        return false;
+    }
+    if (ignoreGlobs.length > 0 && matchesAny(filePath, ignoreGlobs)) {
+        return false;
+    }
+    return true;
+}
+async function fileExists(filePath) {
+    try {
+        await promises_1.default.access(filePath);
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
+async function atomicWriteUtf8(targetPath, content) {
+    await promises_1.default.mkdir(node_path_1.default.dirname(targetPath), { recursive: true });
+    const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
+    const handle = await promises_1.default.open(tempPath, "w");
+    try {
+        await handle.writeFile(content, "utf8");
+        await handle.sync();
+    }
+    finally {
+        await handle.close();
+    }
+    await promises_1.default.rename(tempPath, targetPath);
+}
+async function writeManifest(manifestPath, manifest) {
+    await atomicWriteUtf8(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
+}
+async function writeSummaryAsset(summaryPath, summary) {
+    await atomicWriteUtf8(summaryPath, `${JSON.stringify(summary, null, 2)}\n`);
 }
 async function readManifest(manifestPath) {
     try {
-        return JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
+        const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
+        if (parsed.schemaVersion !== artifacts_1.MANIFEST_SCHEMA_VERSION) {
+            throw new Error(`Unsupported manifest schema in ${manifestPath}. Expected schemaVersion ${artifacts_1.MANIFEST_SCHEMA_VERSION}.`);
+        }
+        return {
+            schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
+            lastSyncedCommit: typeof parsed.lastSyncedCommit === "string" ? parsed.lastSyncedCommit : "",
+            files: parsed.files && typeof parsed.files === "object" ? parsed.files : {}
+        };
     }
     catch (error) {
         const nodeError = error;
         if (nodeError.code === "ENOENT") {
-            return { lastSyncedCommit: "", files: {} };
+            return {
+                schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
+                lastSyncedCommit: "",
+                files: {}
+            };
         }
         throw error;
     }
 }
-async function writeManifest(manifestPath, manifest) {
-    await promises_1.default.mkdir(node_path_1.default.dirname(manifestPath), { recursive: true });
-    await promises_1.default.writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
+async function readIgnorePatterns(repoPath, ignoreFilePath) {
+    const absolutePath = node_path_1.default.isAbsolute(ignoreFilePath)
+        ? ignoreFilePath
+        : node_path_1.default.resolve(repoPath, ignoreFilePath);
+    try {
+        const raw = await promises_1.default.readFile(absolutePath, "utf8");
+        return raw
+            .split(/\r?\n/)
+            .map((line) => line.trim())
+            .filter((line) => line.length > 0 && !line.startsWith("#"))
+            .map(normalizeGlobPattern);
+    }
+    catch (error) {
+        const nodeError = error;
+        if (nodeError.code === "ENOENT") {
+            return [];
+        }
+        throw error;
+    }
 }
-async function walkCodeFiles(rootPath, currentPath = rootPath) {
+async function walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreGlobs, currentPath = rootPath) {
     const entries = await promises_1.default.readdir(currentPath, { withFileTypes: true });
     const files = [];
     for (const entry of entries) {
         const entryPath = node_path_1.default.join(currentPath, entry.name);
         if (entry.isDirectory()) {
-            if (!IGNORED_DIRECTORIES.has(entry.name)) {
-                files.push(...await walkCodeFiles(rootPath, entryPath));
-            }
+            files.push(...await walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreGlobs, entryPath));
             continue;
         }
-        if (entry.isFile() && isTargetCodeFile(entry.name)) {
-            files.push(normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath)));
+        if (entry.isFile()) {
+            const relativePath = normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath));
+            if (shouldIncludeFile(relativePath, includeGlobs, excludeGlobs, ignoreGlobs)) {
+                files.push(relativePath);
+            }
         }
     }
     return files.sort();
 }
-async function summarizeFile(rootPath, relativePath, config) {
-    const absolutePath = node_path_1.default.join(rootPath, relativePath);
-    const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
-    return {
-        hash: (0, hashing_1.hashFileContent)(rawCodeSnapshot),
-        summaryText: await (0, llm_1.generateFunctionalSummary)(relativePath, rawCodeSnapshot, config.chat),
-        rawCodeSnapshot
+function countHashRefs(files) {
+    const refs = new Map();
+    for (const hash of Object.values(files)) {
+        refs.set(hash, (refs.get(hash) || 0) + 1);
+    }
+    return refs;
+}
+async function deleteSummaryIfUnreferenced(summaryDir, hash, refs) {
+    if ((refs.get(hash) || 0) > 0) {
+        return;
+    }
+    const summaryPath = getSummaryPath(summaryDir, hash);
+    try {
+        await promises_1.default.unlink(summaryPath);
+    }
+    catch (error) {
+        const nodeError = error;
+        if (nodeError.code !== "ENOENT") {
+            throw error;
+        }
+    }
+}
+async function setManifestPathHash(filePath, newHash, manifest, manifestPath, summaryDir, refs) {
+    const previousHash = manifest.files[filePath];
+    if (previousHash === newHash) {
+        return;
+    }
+    if (previousHash) {
+        refs.set(previousHash, Math.max((refs.get(previousHash) || 1) - 1, 0));
+    }
+    manifest.files[filePath] = newHash;
+    refs.set(newHash, (refs.get(newHash) || 0) + 1);
+    await writeManifest(manifestPath, manifest);
+    if (previousHash) {
+        await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
+    }
+}
+async function removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs) {
+    const previousHash = manifest.files[filePath];
+    if (!previousHash) {
+        return;
+    }
+    delete manifest.files[filePath];
+    refs.set(previousHash, Math.max((refs.get(previousHash) || 1) - 1, 0));
+    await writeManifest(manifestPath, manifest);
+    await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
+}
+async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
+    const summaryPath = getSummaryPath(summaryDir, hash);
+    if (await fileExists(summaryPath)) {
+        return;
+    }
+    const summary = {
+        schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
+        content_hash: hash,
+        summary: summaryText,
+        raw_code_snapshot: includeCodeSnapshot ? rawCodeSnapshot : undefined
     };
+    await writeSummaryAsset(summaryPath, summary);
+}
+async function pruneOrphanedSummaries(summaryDir, manifest) {
+    const activeHashes = new Set(Object.values(manifest.files));
+    let entries = [];
+    try {
+        entries = await promises_1.default.readdir(summaryDir);
+    }
+    catch (error) {
+        const nodeError = error;
+        if (nodeError.code === "ENOENT") {
+            return;
+        }
+        throw error;
+    }
+    for (const entry of entries) {
+        if (!entry.endsWith(".json")) {
+            continue;
+        }
+        const hash = entry.slice(0, -5);
+        if (activeHashes.has(hash)) {
+            continue;
+        }
+        await promises_1.default.unlink(node_path_1.default.resolve(summaryDir, entry));
+    }
 }
 async function runSummarize(options, config) {
     if (options.mode !== "all" && options.mode !== "delta") {
@@ -68,46 +248,93 @@ async function runSummarize(options, config) {
     const commandCwd = process.cwd();
     const repoPath = node_path_1.default.resolve(commandCwd, options.path);
     const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.out, config.baseDir);
-    const manifest = options.mode === "delta" ? await readManifest(manifestPath) : { lastSyncedCommit: "", files: {} };
+    const summaryDir = getSummaryDir(manifestPath);
+    const manifest = await readManifest(manifestPath);
+    const refs = countHashRefs(manifest.files);
+    const includePatterns = compileGlobs((options.includeGlobs && options.includeGlobs.length > 0)
+        ? options.includeGlobs.map(normalizeGlobPattern)
+        : config.summarize.includeGlobs.map(normalizeGlobPattern));
+    const excludePatterns = compileGlobs((options.excludeGlobs && options.excludeGlobs.length > 0)
+        ? options.excludeGlobs.map(normalizeGlobPattern)
+        : config.summarize.excludeGlobs.map(normalizeGlobPattern));
+    const ignoreFile = options.ignoreFile || config.summarize.ignoreFile;
+    const ignorePatterns = compileGlobs(await readIgnorePatterns(repoPath, ignoreFile));
+    const failures = [];
     if (options.mode === "all") {
-        const files = await walkCodeFiles(repoPath);
         manifest.files = {};
+        refs.clear();
+        await writeManifest(manifestPath, manifest);
+        const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignorePatterns);
         for (const filePath of files) {
-            manifest.files[filePath] = await summarizeFile(repoPath, filePath, config);
-            console.log(`Summarized ${filePath}`);
+            try {
+                const absolutePath = node_path_1.default.join(repoPath, filePath);
+                const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
+                const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
+                const summaryPath = getSummaryPath(summaryDir, hash);
+                if (!await fileExists(summaryPath)) {
+                    const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
+                    await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
+                }
+                manifest.files[filePath] = hash;
+                refs.set(hash, (refs.get(hash) || 0) + 1);
+                await writeManifest(manifestPath, manifest);
+                console.log(`Summarized ${filePath}`);
+            }
+            catch (error) {
+                const message = error instanceof Error ? error.message : String(error);
+                failures.push({ filePath, message });
+                console.error(`Failed ${filePath}: ${message}`);
+            }
         }
     }
     else {
         const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
         for (const deletedPath of deltas.deleted) {
-            delete manifest.files[deletedPath];
+            await removeManifestPath(deletedPath, manifest, manifestPath, summaryDir, refs);
             console.log(`Pruned ${deletedPath}`);
         }
         for (const filePath of deltas.modifiedOrAdded) {
-            const absolutePath = node_path_1.default.join(repoPath, filePath);
             try {
+                if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignorePatterns)) {
+                    await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
+                    continue;
+                }
+                const previousHash = manifest.files[filePath];
+                const absolutePath = node_path_1.default.join(repoPath, filePath);
                 const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
                 const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
-                if (manifest.files[filePath]?.hash === hash)
+                if (previousHash === hash) {
                     continue;
-                manifest.files[filePath] = {
-                    hash,
-                    summaryText: await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat),
-                    rawCodeSnapshot
-                };
+                }
+                const summaryPath = getSummaryPath(summaryDir, hash);
+                if (!await fileExists(summaryPath)) {
+                    const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
+                    await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
+                }
+                await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
                 console.log(`Updated ${filePath}`);
             }
             catch (error) {
                 const nodeError = error;
                 if (nodeError.code === "ENOENT") {
-                    delete manifest.files[filePath];
+                    await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
                     continue;
                 }
-                throw error;
+                const message = error instanceof Error ? error.message : String(error);
+                failures.push({ filePath, message });
+                console.error(`Failed ${filePath}: ${message}`);
             }
         }
     }
     manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
     await writeManifest(manifestPath, manifest);
+    await pruneOrphanedSummaries(summaryDir, manifest);
     console.log(`Wrote manifest to ${manifestPath}`);
+    if (failures.length > 0) {
+        console.error(`\n${failures.length} file(s) failed during summarization:`);
+        for (const failure of failures) {
+            console.error(`- ${failure.filePath}: ${failure.message}`);
+        }
+        throw new Error("Summarization completed with failures.");
+    }
 }

package/dist/config.js CHANGED Viewed

@@ -9,6 +9,22 @@ const node_path_1 = __importDefault(require("node:path"));
 function readOption(value, envName, fallback = "") {
     return value || process.env[envName] || fallback;
 }
+function parseCsv(value) {
+    return value.split(",").map((item) => item.trim()).filter(Boolean);
+}
+function readListOption(value, envName, fallback = []) {
+    if (Array.isArray(value)) {
+        return value.flatMap((item) => parseCsv(item)).filter(Boolean);
+    }
+    if (typeof value === "string" && value.trim()) {
+        return parseCsv(value);
+    }
+    const envValue = process.env[envName];
+    if (envValue && envValue.trim()) {
+        return parseCsv(envValue);
+    }
+    return fallback;
+}
 function loadRcFile(configPath) {
     const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
     if (!node_fs_1.default.existsSync(resolvedPath)) {
@@ -41,6 +57,9 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
     const mergedOptions = mergeConfigOptions(options);
     const provider = readProvider(mergedOptions.aiProvider);
     const apiKey = readOption(mergedOptions.openaiApiKey, "OPENAI_API_KEY", provider === "local" ? "local-key" : "");
+    const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
+    const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
+    const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
     const chatBaseURL = provider === "cloud"
         ? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
         : readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
@@ -80,6 +99,11 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
             apiKey,
             baseURL: embedBaseURL,
             model: embedModel
+        },
+        summarize: {
+            includeGlobs,
+            excludeGlobs,
+            ignoreFile
         }
     };
 }

package/dist/index.js CHANGED Viewed

@@ -8,6 +8,10 @@ const query_1 = require("./commands/query");
 const summarize_1 = require("./commands/summarize");
 const llm_1 = require("./utils/llm");
 const program = new commander_1.Command();
+function collectOption(value, previous) {
+    previous.push(value);
+    return previous;
+}
 function addBaseOptions(command) {
     return command
         .option("--config <path>", "path to .diffdocrc JSON config file")
@@ -43,10 +47,22 @@ addChatOptions(addBaseOptions(program
     .option("--path <path>", "repository or code path to scan", ".")
     .option("--out <path>", "manifest output path under --base-dir", "manifest.json")
     .option("--mode <mode>", "summarization mode: all or delta", "all")
+    .option("--include-code-snapshot", "store raw code in summary assets", false)
+    .option("--include-glob <pattern>", "include glob pattern (repeatable)", collectOption, [])
+    .option("--exclude-glob <pattern>", "exclude glob pattern (repeatable)", collectOption, [])
+    .option("--ignore-file <path>", "path to ignore pattern file relative to --path")
     .action(async (options) => {
     try {
         const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
-        await (0, summarize_1.runSummarize)({ path: options.path, out: options.out, mode: options.mode }, config);
+        await (0, summarize_1.runSummarize)({
+            path: options.path,
+            out: options.out,
+            mode: options.mode,
+            includeCodeSnapshot: options.includeCodeSnapshot,
+            includeGlobs: options.includeGlob,
+            excludeGlobs: options.excludeGlob,
+            ignoreFile: options.ignoreFile
+        }, config);
     }
     catch (error) {
         console.error(error instanceof Error ? error.message : error);
@@ -104,10 +120,11 @@ addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
     .command("embed"))))
     .description("Embed manifest summaries into a local Vectra index")
     .option("--manifest <path>", "manifest input path under --base-dir", "manifest.json")
+    .option("--rebuild", "rebuild local index from scratch", false)
     .action(async (options) => {
     try {
         const config = (0, config_1.buildRuntimeConfig)(options, { embeddings: true });
-        await (0, embed_1.runEmbed)({ manifest: options.manifest }, config);
+        await (0, embed_1.runEmbed)({ manifest: options.manifest, rebuild: options.rebuild }, config);
     }
     catch (error) {
         console.error(error instanceof Error ? error.message : error);

package/dist/mcp.js ADDED Viewed

@@ -0,0 +1,133 @@
+#!/usr/bin/env node
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
+const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
+const zod_1 = require("zod");
+const config_1 = require("./config");
+const retrieval_1 = require("./services/retrieval");
+const MCP_SERVER_VERSION = "0.1.1";
+function readCliOptions(argv) {
+    const options = {};
+    for (let i = 0; i < argv.length; i += 1) {
+        const arg = argv[i];
+        if (!arg.startsWith("--"))
+            continue;
+        const key = arg.slice(2);
+        const nextValue = argv[i + 1];
+        if (!nextValue || nextValue.startsWith("--")) {
+            throw new Error(`Missing value for --${key}.`);
+        }
+        i += 1;
+        switch (key) {
+            case "config":
+                options.config = nextValue;
+                break;
+            case "base-dir":
+                options.baseDir = nextValue;
+                break;
+            case "ai-provider":
+                options.aiProvider = nextValue;
+                break;
+            case "local-llm-endpoint":
+                options.localLlmEndpoint = nextValue;
+                break;
+            case "local-chat-model":
+                options.localChatModel = nextValue;
+                break;
+            case "local-embed-endpoint":
+                options.localEmbedEndpoint = nextValue;
+                break;
+            case "local-embed-model":
+                options.localEmbedModel = nextValue;
+                break;
+            case "cloud-llm-endpoint":
+                options.cloudLlmEndpoint = nextValue;
+                break;
+            case "cloud-chat-model":
+                options.cloudChatModel = nextValue;
+                break;
+            case "cloud-embed-model":
+                options.cloudEmbedModel = nextValue;
+                break;
+            case "openai-api-key":
+                options.openaiApiKey = nextValue;
+                break;
+            default:
+                throw new Error(`Unknown MCP option: --${key}.`);
+        }
+    }
+    return options;
+}
+function buildConfig(options, needs) {
+    return (0, config_1.buildRuntimeConfig)(options, needs);
+}
+function jsonText(data) {
+    return {
+        content: [
+            {
+                type: "text",
+                text: JSON.stringify(data, null, 2)
+            }
+        ]
+    };
+}
+async function main() {
+    const runtimeOptions = readCliOptions(process.argv.slice(2));
+    const server = new mcp_js_1.McpServer({
+        name: "diffdoc",
+        version: MCP_SERVER_VERSION
+    });
+    const toolServer = server;
+    toolServer.registerTool("diffdoc_search", {
+        title: "Search DiffDoc Index",
+        description: "Search the local DiffDoc Vectra index and return raw matching files, summaries, and optional code snapshots.",
+        inputSchema: {
+            query: zod_1.z.string().min(1).describe("Natural-language search query."),
+            top: zod_1.z.number().int().positive().optional().describe("Number of matches to return."),
+            includeCode: zod_1.z.boolean().optional().describe("Include raw code snapshots in the returned results.")
+        }
+    }, async ({ query, top = 5, includeCode = false }) => {
+        const config = buildConfig(runtimeOptions, { embeddings: true });
+        const results = await (0, retrieval_1.searchIndex)(String(query), (0, retrieval_1.parseTopK)(top), config);
+        return jsonText({
+            results: results.map((result) => ({
+                filePath: result.filePath,
+                score: result.score,
+                hash: result.hash,
+                summaryText: result.summaryText,
+                rawCodeSnapshot: Boolean(includeCode) ? result.rawCodeSnapshot : undefined
+            }))
+        });
+    });
+    toolServer.registerTool("diffdoc_answer", {
+        title: "Answer From DiffDoc Index",
+        description: "Answer a question using retrieved DiffDoc index context and the configured chat model.",
+        inputSchema: {
+            question: zod_1.z.string().min(1).describe("Question to answer using indexed DiffDoc context."),
+            top: zod_1.z.number().int().positive().optional().describe("Number of matches to retrieve before answering."),
+            includeResults: zod_1.z.boolean().optional().describe("Include full retrieved results in addition to answer and sources.")
+        }
+    }, async ({ question, top = 5, includeResults = false }) => {
+        const config = buildConfig(runtimeOptions, { chat: true, embeddings: true });
+        const answer = await (0, retrieval_1.answerFromIndex)(String(question), (0, retrieval_1.parseTopK)(top), config);
+        return jsonText({
+            answer: answer.answer,
+            sources: answer.sources,
+            results: Boolean(includeResults) ? answer.results : undefined
+        });
+    });
+    toolServer.registerTool("diffdoc_index_stats", {
+        title: "DiffDoc Index Stats",
+        description: "Return the local DiffDoc Vectra index path, existence status, and indexed item count.",
+        inputSchema: {}
+    }, async () => {
+        const config = buildConfig(runtimeOptions, { embeddings: false, chat: false });
+        return jsonText(await (0, retrieval_1.getIndexStats)(config));
+    });
+    await server.connect(new stdio_js_1.StdioServerTransport());
+}
+main().catch((error) => {
+    console.error(error instanceof Error ? error.message : error);
+    process.exit(1);
+});

package/dist/services/retrieval.js ADDED Viewed

@@ -0,0 +1,86 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parseTopK = parseTopK;
+exports.trimForDisplay = trimForDisplay;
+exports.searchIndex = searchIndex;
+exports.answerFromIndex = answerFromIndex;
+exports.getIndexStats = getIndexStats;
+const vectra_1 = require("vectra");
+const embed_1 = require("../commands/embed");
+const llm_1 = require("../utils/llm");
+const CODE_QUERY_PREFIX = "Represent this query for searching relevant code: ";
+function parseTopK(value) {
+    const topK = typeof value === "number" ? value : Number.parseInt(value, 10);
+    if (!Number.isInteger(topK) || topK < 1) {
+        throw new Error("Invalid top value. Expected a positive integer.");
+    }
+    return topK;
+}
+function trimForDisplay(text, maxLength) {
+    if (text.length <= maxLength) {
+        return text;
+    }
+    return `${text.slice(0, maxLength).trimEnd()}...`;
+}
+function mapSearchResult(result) {
+    const metadata = result.item.metadata;
+    return {
+        filePath: metadata.filePath,
+        score: result.score,
+        hash: metadata.hash,
+        summaryText: metadata.summaryText,
+        rawCodeSnapshot: metadata.rawCodeSnapshot
+    };
+}
+function buildAnswerPrompt(question, results) {
+    const context = results.map((result, indexPosition) => {
+        return [
+            `Result ${indexPosition + 1}`,
+            `File: ${result.filePath}`,
+            `Score: ${result.score}`,
+            `Summary:\n${result.summaryText}`,
+            `Code Snapshot:\n${result.rawCodeSnapshot || "(not stored)"}`
+        ].join("\n");
+    }).join("\n\n---\n\n");
+    return `Answer the user's question using only the retrieved DiffDoc results below. If the results do not contain enough information, say what is missing. Prefer a direct answer first, then cite the relevant file paths. Keep the explanation appropriate to the question: summarize when asked for a summary, explain implementation details when asked how something works, and avoid unsupported claims.\n\nUser question:\n${question}\n\nRetrieved results:\n${context}`;
+}
+async function getExistingIndex(config) {
+    const indexPath = (0, embed_1.getVectraIndexPath)(config);
+    const index = new vectra_1.LocalIndex(indexPath);
+    if (!await index.isIndexCreated()) {
+        throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
+    }
+    return index;
+}
+async function searchIndex(query, topK, config) {
+    const index = await getExistingIndex(config);
+    const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${query}`], config.embeddings);
+    const results = await index.queryItems(queryVector, query, topK);
+    return results.map(mapSearchResult);
+}
+async function answerFromIndex(question, topK, config) {
+    const results = await searchIndex(question, topK, config);
+    if (results.length === 0) {
+        return {
+            answer: "No matching embedded summaries found.",
+            sources: [],
+            results: []
+        };
+    }
+    const answer = await (0, llm_1.promptLlm)(buildAnswerPrompt(question, results), config.chat);
+    return {
+        answer,
+        sources: results.map((result) => ({ filePath: result.filePath, score: result.score })),
+        results
+    };
+}
+async function getIndexStats(config) {
+    const indexPath = (0, embed_1.getVectraIndexPath)(config);
+    const index = new vectra_1.LocalIndex(indexPath);
+    const exists = await index.isIndexCreated();
+    if (!exists) {
+        return { indexPath, exists: false, items: 0 };
+    }
+    const stats = await index.getIndexStats();
+    return { indexPath, exists: true, items: stats.items };
+}

package/dist/types/artifacts.js ADDED Viewed

@@ -0,0 +1,5 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.SUMMARY_ASSET_SCHEMA_VERSION = exports.MANIFEST_SCHEMA_VERSION = void 0;
+exports.MANIFEST_SCHEMA_VERSION = 2;
+exports.SUMMARY_ASSET_SCHEMA_VERSION = 1;

package/dist/utils/git.js CHANGED Viewed

@@ -7,16 +7,12 @@ exports.getGitDeltas = getGitDeltas;
 exports.getCurrentCommit = getCurrentCommit;
 const node_path_1 = __importDefault(require("node:path"));
 const simple_git_1 = __importDefault(require("simple-git"));
-const TARGET_EXTENSIONS = new Set([".ts", ".js", ".cs", ".py"]);
 function normalizePath(filePath) {
     return filePath.split(node_path_1.default.sep).join("/");
 }
-function isTargetCodeFile(filePath) {
-    return TARGET_EXTENSIONS.has(node_path_1.default.extname(filePath));
-}
 function addUnique(target, filePath) {
     const normalized = normalizePath(filePath.trim());
-    if (normalized && isTargetCodeFile(normalized)) {
+    if (normalized) {
         target.add(normalized);
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "diffdoc",
-  "version": "0.1.1",
+  "version": "0.3.0",
   "description": "Translate repository code shifts into plain-English business context",
   "license": "MIT",
   "author": "Christopher Sullivan",
@@ -15,7 +15,8 @@
   "type": "commonjs",
   "main": "dist/index.js",
   "bin": {
-    "diffdoc": "./dist/index.js"
+    "diffdoc": "./dist/index.js",
+    "diffdoc-mcp": "./dist/mcp.js"
   },
   "files": [
     "dist",
@@ -33,10 +34,12 @@
     "prepare": "npm run build"
   },
   "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.29.0",
     "commander": "^12.0.0",
     "openai": "^4.28.0",
     "simple-git": "^3.24.0",
-    "vectra": "^0.14.0"
+    "vectra": "^0.14.0",
+    "zod": "^3.25.76"
   },
   "devDependencies": {
     "@types/node": "^20.19.41",