diffdoc 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,7 +37,8 @@ Package scripts can call the installed binary:
37
37
  "diffdoc:summarize": "diffdoc summarize",
38
38
  "diffdoc:embed": "diffdoc embed",
39
39
  "diffdoc:search": "diffdoc search",
40
- "diffdoc:query": "diffdoc query"
40
+ "diffdoc:query": "diffdoc query",
41
+ "diffdoc:mcp": "diffdoc-mcp"
41
42
  }
42
43
  }
43
44
  ```
@@ -180,6 +181,47 @@ diffdoc summarize --path . --mode delta
180
181
  diffdoc embed
181
182
  ```
182
183
 
184
+ ## GitHub Actions
185
+
186
+ This repository includes a workflow at `.github/workflows/diffdoc-summarize.yml` that runs on pushes to `main`. It installs the project, builds the CLI, runs delta summarization, and commits `.diffdoc/manifest.json` back to the branch when the manifest changes.
187
+
188
+ The workflow intentionally ignores `.diffdoc/manifest.json` and `.diffdoc/vectra/**` changes as triggers so the bot commit does not create a loop.
189
+
190
+ Configure the same values used by the CLI as GitHub Actions variables or secrets, such as `AI_PROVIDER`, `LOCAL_LLM_ENDPOINT`, `LOCAL_CHAT_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, and `OPENAI_API_KEY`. The workflow uses the environment-variable fallback path in DiffDoc, so no `.diffdocrc` file is required in CI.
191
+
192
+ ## MCP Server
193
+
194
+ DiffDoc also ships a local MCP stdio server as `diffdoc-mcp`. This lets MCP-compatible agents search or answer questions against the local Vectra index directly.
195
+
196
+ Run it manually with the same config style as the CLI:
197
+
198
+ ```bash
199
+ diffdoc-mcp --config ./.diffdocrc
200
+ ```
201
+
202
+ Example MCP client configuration:
203
+
204
+ ```json
205
+ {
206
+ "mcpServers": {
207
+ "diffdoc": {
208
+ "command": "npx",
209
+ "args": ["diffdoc-mcp", "--config", "./.diffdocrc"]
210
+ }
211
+ }
212
+ }
213
+ ```
214
+
215
+ If DiffDoc is installed as a project dev dependency, the same `npx diffdoc-mcp` command will resolve the local package binary.
216
+
217
+ Available MCP tools:
218
+
219
+ - `diffdoc_search`: searches the local Vectra index and returns raw file matches, summaries, scores, hashes, and optional code snapshots.
220
+ - `diffdoc_answer`: retrieves relevant index context and asks the configured chat model to answer the question.
221
+ - `diffdoc_index_stats`: returns the Vectra index path, whether it exists, and the indexed item count.
222
+
223
+ Run `diffdoc summarize` and `diffdoc embed` before using the MCP server, otherwise the search and answer tools will not have a local index to query.
224
+
183
225
  ## Notes
184
226
 
185
227
  - Node.js `>=22` is required because Vectra requires it.
@@ -2,95 +2,49 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.runQuery = runQuery;
4
4
  exports.runSearch = runSearch;
5
- const vectra_1 = require("vectra");
6
- const llm_1 = require("../utils/llm");
7
- const embed_1 = require("./embed");
8
- const CODE_QUERY_PREFIX = "Represent this query for searching relevant code: ";
9
- function parseTopK(value) {
10
- const topK = Number.parseInt(value, 10);
11
- if (!Number.isInteger(topK) || topK < 1) {
12
- throw new Error("Invalid --top value. Expected a positive integer.");
13
- }
14
- return topK;
15
- }
16
- function trimForDisplay(text, maxLength) {
17
- if (text.length <= maxLength) {
18
- return text;
19
- }
20
- return `${text.slice(0, maxLength).trimEnd()}...`;
21
- }
22
- function buildAnswerPrompt(question, results) {
23
- const context = results.map((result, indexPosition) => {
24
- const metadata = result.item.metadata;
25
- return [
26
- `Result ${indexPosition + 1}`,
27
- `File: ${metadata.filePath}`,
28
- `Score: ${result.score}`,
29
- `Summary:\n${metadata.summaryText}`,
30
- `Code Snapshot:\n${metadata.rawCodeSnapshot}`
31
- ].join("\n");
32
- }).join("\n\n---\n\n");
33
- return `Answer the user's question using only the retrieved DiffDoc results below. If the results do not contain enough information, say what is missing. Prefer a direct answer first, then cite the relevant file paths. Keep the explanation appropriate to the question: summarize when asked for a summary, explain implementation details when asked how something works, and avoid unsupported claims.\n\nUser question:\n${question}\n\nRetrieved results:\n${context}`;
34
- }
5
+ const retrieval_1 = require("../services/retrieval");
35
6
  async function runQuery(message, options, config) {
36
- const topK = parseTopK(options.top);
37
- const indexPath = (0, embed_1.getVectraIndexPath)(config);
38
- const index = new vectra_1.LocalIndex(indexPath);
39
- if (!await index.isIndexCreated()) {
40
- throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
41
- }
42
- const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
43
- const results = await index.queryItems(queryVector, message, topK);
44
- if (results.length === 0) {
45
- console.log("No matching embedded summaries found.");
7
+ const topK = (0, retrieval_1.parseTopK)(options.top);
8
+ const answerResult = await (0, retrieval_1.answerFromIndex)(message, topK, config);
9
+ console.log(answerResult.answer);
10
+ if (answerResult.sources.length === 0) {
46
11
  return;
47
12
  }
48
- const answer = await (0, llm_1.promptLlm)(buildAnswerPrompt(message, results), config.chat);
49
- console.log(answer);
50
13
  console.log("\nSources:");
51
- for (const [indexPosition, result] of results.entries()) {
52
- const metadata = result.item.metadata;
53
- console.log(`${indexPosition + 1}. ${metadata.filePath} (${result.score.toFixed(4)})`);
14
+ for (const [indexPosition, source] of answerResult.sources.entries()) {
15
+ console.log(`${indexPosition + 1}. ${source.filePath} (${source.score.toFixed(4)})`);
54
16
  }
55
17
  if (!options.code) {
56
18
  return;
57
19
  }
58
- for (const [indexPosition, result] of results.entries()) {
59
- const metadata = result.item.metadata;
60
- console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
20
+ for (const [indexPosition, result] of answerResult.results.entries()) {
21
+ console.log(`\n#${indexPosition + 1} ${result.filePath}`);
61
22
  console.log(`Score: ${result.score.toFixed(4)}`);
62
- console.log(`Hash: ${metadata.hash}`);
23
+ console.log(`Hash: ${result.hash}`);
63
24
  console.log("Summary:");
64
- console.log(trimForDisplay(metadata.summaryText, 1200));
25
+ console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
65
26
  if (options.code) {
66
27
  console.log("Code Snapshot:");
67
- console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
28
+ console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot, 2000));
68
29
  }
69
30
  }
70
31
  }
71
32
  async function runSearch(message, options, config) {
72
- const topK = parseTopK(options.top);
73
- const indexPath = (0, embed_1.getVectraIndexPath)(config);
74
- const index = new vectra_1.LocalIndex(indexPath);
75
- if (!await index.isIndexCreated()) {
76
- throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
77
- }
78
- const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
79
- const results = await index.queryItems(queryVector, message, topK);
33
+ const topK = (0, retrieval_1.parseTopK)(options.top);
34
+ const results = await (0, retrieval_1.searchIndex)(message, topK, config);
80
35
  if (results.length === 0) {
81
36
  console.log("No matching embedded summaries found.");
82
37
  return;
83
38
  }
84
39
  for (const [indexPosition, result] of results.entries()) {
85
- const metadata = result.item.metadata;
86
- console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
40
+ console.log(`\n#${indexPosition + 1} ${result.filePath}`);
87
41
  console.log(`Score: ${result.score.toFixed(4)}`);
88
- console.log(`Hash: ${metadata.hash}`);
42
+ console.log(`Hash: ${result.hash}`);
89
43
  console.log("Summary:");
90
- console.log(trimForDisplay(metadata.summaryText, 1200));
44
+ console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
91
45
  if (options.code) {
92
46
  console.log("Code Snapshot:");
93
- console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
47
+ console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot, 2000));
94
48
  }
95
49
  }
96
50
  }
package/dist/mcp.js ADDED
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
5
+ const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
6
+ const zod_1 = require("zod");
7
+ const config_1 = require("./config");
8
+ const retrieval_1 = require("./services/retrieval");
9
+ const MCP_SERVER_VERSION = "0.1.1";
10
+ function readCliOptions(argv) {
11
+ const options = {};
12
+ for (let i = 0; i < argv.length; i += 1) {
13
+ const arg = argv[i];
14
+ if (!arg.startsWith("--"))
15
+ continue;
16
+ const key = arg.slice(2);
17
+ const nextValue = argv[i + 1];
18
+ if (!nextValue || nextValue.startsWith("--")) {
19
+ throw new Error(`Missing value for --${key}.`);
20
+ }
21
+ i += 1;
22
+ switch (key) {
23
+ case "config":
24
+ options.config = nextValue;
25
+ break;
26
+ case "base-dir":
27
+ options.baseDir = nextValue;
28
+ break;
29
+ case "ai-provider":
30
+ options.aiProvider = nextValue;
31
+ break;
32
+ case "local-llm-endpoint":
33
+ options.localLlmEndpoint = nextValue;
34
+ break;
35
+ case "local-chat-model":
36
+ options.localChatModel = nextValue;
37
+ break;
38
+ case "local-embed-endpoint":
39
+ options.localEmbedEndpoint = nextValue;
40
+ break;
41
+ case "local-embed-model":
42
+ options.localEmbedModel = nextValue;
43
+ break;
44
+ case "cloud-llm-endpoint":
45
+ options.cloudLlmEndpoint = nextValue;
46
+ break;
47
+ case "cloud-chat-model":
48
+ options.cloudChatModel = nextValue;
49
+ break;
50
+ case "cloud-embed-model":
51
+ options.cloudEmbedModel = nextValue;
52
+ break;
53
+ case "openai-api-key":
54
+ options.openaiApiKey = nextValue;
55
+ break;
56
+ default:
57
+ throw new Error(`Unknown MCP option: --${key}.`);
58
+ }
59
+ }
60
+ return options;
61
+ }
62
+ function buildConfig(options, needs) {
63
+ return (0, config_1.buildRuntimeConfig)(options, needs);
64
+ }
65
+ function jsonText(data) {
66
+ return {
67
+ content: [
68
+ {
69
+ type: "text",
70
+ text: JSON.stringify(data, null, 2)
71
+ }
72
+ ]
73
+ };
74
+ }
75
+ async function main() {
76
+ const runtimeOptions = readCliOptions(process.argv.slice(2));
77
+ const server = new mcp_js_1.McpServer({
78
+ name: "diffdoc",
79
+ version: MCP_SERVER_VERSION
80
+ });
81
+ const toolServer = server;
82
+ toolServer.registerTool("diffdoc_search", {
83
+ title: "Search DiffDoc Index",
84
+ description: "Search the local DiffDoc Vectra index and return raw matching files, summaries, and optional code snapshots.",
85
+ inputSchema: {
86
+ query: zod_1.z.string().min(1).describe("Natural-language search query."),
87
+ top: zod_1.z.number().int().positive().optional().describe("Number of matches to return."),
88
+ includeCode: zod_1.z.boolean().optional().describe("Include raw code snapshots in the returned results.")
89
+ }
90
+ }, async ({ query, top = 5, includeCode = false }) => {
91
+ const config = buildConfig(runtimeOptions, { embeddings: true });
92
+ const results = await (0, retrieval_1.searchIndex)(String(query), (0, retrieval_1.parseTopK)(top), config);
93
+ return jsonText({
94
+ results: results.map((result) => ({
95
+ filePath: result.filePath,
96
+ score: result.score,
97
+ hash: result.hash,
98
+ summaryText: result.summaryText,
99
+ rawCodeSnapshot: Boolean(includeCode) ? result.rawCodeSnapshot : undefined
100
+ }))
101
+ });
102
+ });
103
+ toolServer.registerTool("diffdoc_answer", {
104
+ title: "Answer From DiffDoc Index",
105
+ description: "Answer a question using retrieved DiffDoc index context and the configured chat model.",
106
+ inputSchema: {
107
+ question: zod_1.z.string().min(1).describe("Question to answer using indexed DiffDoc context."),
108
+ top: zod_1.z.number().int().positive().optional().describe("Number of matches to retrieve before answering."),
109
+ includeResults: zod_1.z.boolean().optional().describe("Include full retrieved results in addition to answer and sources.")
110
+ }
111
+ }, async ({ question, top = 5, includeResults = false }) => {
112
+ const config = buildConfig(runtimeOptions, { chat: true, embeddings: true });
113
+ const answer = await (0, retrieval_1.answerFromIndex)(String(question), (0, retrieval_1.parseTopK)(top), config);
114
+ return jsonText({
115
+ answer: answer.answer,
116
+ sources: answer.sources,
117
+ results: Boolean(includeResults) ? answer.results : undefined
118
+ });
119
+ });
120
+ toolServer.registerTool("diffdoc_index_stats", {
121
+ title: "DiffDoc Index Stats",
122
+ description: "Return the local DiffDoc Vectra index path, existence status, and indexed item count.",
123
+ inputSchema: {}
124
+ }, async () => {
125
+ const config = buildConfig(runtimeOptions, { embeddings: false, chat: false });
126
+ return jsonText(await (0, retrieval_1.getIndexStats)(config));
127
+ });
128
+ await server.connect(new stdio_js_1.StdioServerTransport());
129
+ }
130
+ main().catch((error) => {
131
+ console.error(error instanceof Error ? error.message : error);
132
+ process.exit(1);
133
+ });
@@ -0,0 +1,86 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.parseTopK = parseTopK;
4
+ exports.trimForDisplay = trimForDisplay;
5
+ exports.searchIndex = searchIndex;
6
+ exports.answerFromIndex = answerFromIndex;
7
+ exports.getIndexStats = getIndexStats;
8
+ const vectra_1 = require("vectra");
9
+ const embed_1 = require("../commands/embed");
10
+ const llm_1 = require("../utils/llm");
11
+ const CODE_QUERY_PREFIX = "Represent this query for searching relevant code: ";
12
+ function parseTopK(value) {
13
+ const topK = typeof value === "number" ? value : Number.parseInt(value, 10);
14
+ if (!Number.isInteger(topK) || topK < 1) {
15
+ throw new Error("Invalid top value. Expected a positive integer.");
16
+ }
17
+ return topK;
18
+ }
19
+ function trimForDisplay(text, maxLength) {
20
+ if (text.length <= maxLength) {
21
+ return text;
22
+ }
23
+ return `${text.slice(0, maxLength).trimEnd()}...`;
24
+ }
25
+ function mapSearchResult(result) {
26
+ const metadata = result.item.metadata;
27
+ return {
28
+ filePath: metadata.filePath,
29
+ score: result.score,
30
+ hash: metadata.hash,
31
+ summaryText: metadata.summaryText,
32
+ rawCodeSnapshot: metadata.rawCodeSnapshot
33
+ };
34
+ }
35
+ function buildAnswerPrompt(question, results) {
36
+ const context = results.map((result, indexPosition) => {
37
+ return [
38
+ `Result ${indexPosition + 1}`,
39
+ `File: ${result.filePath}`,
40
+ `Score: ${result.score}`,
41
+ `Summary:\n${result.summaryText}`,
42
+ `Code Snapshot:\n${result.rawCodeSnapshot}`
43
+ ].join("\n");
44
+ }).join("\n\n---\n\n");
45
+ return `Answer the user's question using only the retrieved DiffDoc results below. If the results do not contain enough information, say what is missing. Prefer a direct answer first, then cite the relevant file paths. Keep the explanation appropriate to the question: summarize when asked for a summary, explain implementation details when asked how something works, and avoid unsupported claims.\n\nUser question:\n${question}\n\nRetrieved results:\n${context}`;
46
+ }
47
+ async function getExistingIndex(config) {
48
+ const indexPath = (0, embed_1.getVectraIndexPath)(config);
49
+ const index = new vectra_1.LocalIndex(indexPath);
50
+ if (!await index.isIndexCreated()) {
51
+ throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
52
+ }
53
+ return index;
54
+ }
55
+ async function searchIndex(query, topK, config) {
56
+ const index = await getExistingIndex(config);
57
+ const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${query}`], config.embeddings);
58
+ const results = await index.queryItems(queryVector, query, topK);
59
+ return results.map(mapSearchResult);
60
+ }
61
+ async function answerFromIndex(question, topK, config) {
62
+ const results = await searchIndex(question, topK, config);
63
+ if (results.length === 0) {
64
+ return {
65
+ answer: "No matching embedded summaries found.",
66
+ sources: [],
67
+ results: []
68
+ };
69
+ }
70
+ const answer = await (0, llm_1.promptLlm)(buildAnswerPrompt(question, results), config.chat);
71
+ return {
72
+ answer,
73
+ sources: results.map((result) => ({ filePath: result.filePath, score: result.score })),
74
+ results
75
+ };
76
+ }
77
+ async function getIndexStats(config) {
78
+ const indexPath = (0, embed_1.getVectraIndexPath)(config);
79
+ const index = new vectra_1.LocalIndex(indexPath);
80
+ const exists = await index.isIndexCreated();
81
+ if (!exists) {
82
+ return { indexPath, exists: false, items: 0 };
83
+ }
84
+ const stats = await index.getIndexStats();
85
+ return { indexPath, exists: true, items: stats.items };
86
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "diffdoc",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "Translate repository code shifts into plain-English business context",
5
5
  "license": "MIT",
6
6
  "author": "Christopher Sullivan",
@@ -15,7 +15,8 @@
15
15
  "type": "commonjs",
16
16
  "main": "dist/index.js",
17
17
  "bin": {
18
- "diffdoc": "./dist/index.js"
18
+ "diffdoc": "./dist/index.js",
19
+ "diffdoc-mcp": "./dist/mcp.js"
19
20
  },
20
21
  "files": [
21
22
  "dist",
@@ -33,10 +34,12 @@
33
34
  "prepare": "npm run build"
34
35
  },
35
36
  "dependencies": {
37
+ "@modelcontextprotocol/sdk": "^1.29.0",
36
38
  "commander": "^12.0.0",
37
39
  "openai": "^4.28.0",
38
40
  "simple-git": "^3.24.0",
39
- "vectra": "^0.14.0"
41
+ "vectra": "^0.14.0",
42
+ "zod": "^3.25.76"
40
43
  },
41
44
  "devDependencies": {
42
45
  "@types/node": "^20.19.41",