diffdoc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ {
2
+ "baseDir": "./.diffdoc",
3
+ "aiProvider": "local",
4
+ "localLlmEndpoint": "http://localhost:11434/v1",
5
+ "localEmbedEndpoint": "http://localhost:11434/v1/embeddings",
6
+ "localChatModel": "qwen2.5-coder:7b",
7
+ "localEmbedModel": "nomic-embed-code",
8
+ "cloudLlmEndpoint": "https://api.openai.com/v1",
9
+ "cloudChatModel": "gpt-4o-mini",
10
+ "cloudEmbedModel": "text-embedding-3-small",
11
+ "openaiApiKey": ""
12
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Christopher Sullivan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,153 @@
1
+ # DiffDoc
2
+
3
+ ## Project Description
4
+
5
+ DiffDoc turns source code into searchable, plain-English project context. It scans repository files, asks an OpenAI-compatible chat model to summarize the business behavior in each file, stores those summaries in a portable JSON manifest, embeds the manifest into a local Vectra index, and answers questions using the indexed results as retrieval context.
6
+
7
+ The project is designed for teams that need fast codebase comprehension without requiring every stakeholder to read implementation details. It can run against local model servers such as Ollama, LM Studio, or vLLM, or against cloud OpenAI-compatible APIs.
8
+
9
+ ## Installation
10
+
11
+ Run from this repository:
12
+
13
+ ```bash
14
+ npm install
15
+ npm run build
16
+ node dist/index.js --help
17
+ ```
18
+
19
+ Run after publishing:
20
+
21
+ ```bash
22
+ npx diffdoc --help
23
+ ```
24
+
25
+ Use as a project dev dependency:
26
+
27
+ ```bash
28
+ npm install --save-dev diffdoc
29
+ npx diffdoc --help
30
+ ```
31
+
32
+ Package scripts can call the installed binary:
33
+
34
+ ```json
35
+ {
36
+ "scripts": {
37
+ "diffdoc:summarize": "diffdoc summarize",
38
+ "diffdoc:embed": "diffdoc embed",
39
+ "diffdoc:query": "diffdoc query"
40
+ }
41
+ }
42
+ ```
43
+
44
+ ## Configuration
45
+
46
+ DiffDoc accepts runtime flags on each command. It also loads a JSON `.diffdocrc` file from the current working directory when present, or from a custom path with `--config <path>`.
47
+
48
+ Precedence:
49
+
50
+ 1. CLI flags
51
+ 2. `.diffdocrc`
52
+ 3. Environment variable fallbacks
53
+
54
+ Create a local config from the example:
55
+
56
+ ```bash
57
+ cp .diffdocrc.example .diffdocrc
58
+ ```
59
+
60
+ Example config with all supported keys:
61
+
62
+ ```json
63
+ {
64
+ "baseDir": "./.diffdoc",
65
+ "aiProvider": "local",
66
+ "localLlmEndpoint": "http://localhost:11434/v1",
67
+ "localEmbedEndpoint": "http://localhost:11434/v1/embeddings",
68
+ "localChatModel": "qwen2.5-coder:7b",
69
+ "localEmbedModel": "nomic-embed-code",
70
+ "cloudLlmEndpoint": "https://api.openai.com/v1",
71
+ "cloudChatModel": "gpt-4o-mini",
72
+ "cloudEmbedModel": "text-embedding-3-small",
73
+ "openaiApiKey": ""
74
+ }
75
+ ```
76
+
77
+ Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, and `OPENAI_API_KEY`.
78
+
79
+ ## Commands
80
+
81
+ Summarize a repository into `./.diffdoc/manifest.json`:
82
+
83
+ ```bash
84
+ diffdoc summarize --path . --mode all
85
+ ```
86
+
87
+ Summarize only changed Git files using the existing manifest state:
88
+
89
+ ```bash
90
+ diffdoc summarize --path . --mode delta
91
+ ```
92
+
93
+ Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
94
+
95
+ ```bash
96
+ diffdoc embed
97
+ ```
98
+
99
+ Ask a question using retrieved embedded context:
100
+
101
+ ```bash
102
+ diffdoc query "How does this project process changed files?"
103
+ ```
104
+
105
+ Include retrieved code snapshots after the answer:
106
+
107
+ ```bash
108
+ diffdoc query "How does embedding work?" --top 3 --code
109
+ ```
110
+
111
+ Prompt the configured chat model directly:
112
+
113
+ ```bash
114
+ diffdoc prompt "Confirm the configured model is reachable."
115
+ ```
116
+
117
+ Use a custom config file:
118
+
119
+ ```bash
120
+ diffdoc query "How does embedding work?" --config ./config/diffdoc.local.json
121
+ ```
122
+
123
+ Override a config value at runtime:
124
+
125
+ ```bash
126
+ diffdoc embed --config ./.diffdocrc --base-dir ./tmp-diffdoc
127
+ ```
128
+
129
+ ## Workflow
130
+
131
+ Typical usage is:
132
+
133
+ ```bash
134
+ diffdoc summarize --path . --mode all
135
+ diffdoc embed
136
+ diffdoc query "What business behavior does this repository implement?"
137
+ ```
138
+
139
+ After the initial run, use delta mode to refresh changed files:
140
+
141
+ ```bash
142
+ diffdoc summarize --path . --mode delta
143
+ diffdoc embed
144
+ ```
145
+
146
+ ## Notes
147
+
148
+ - Node.js `>=22` is required because Vectra requires it.
149
+ - `.diffdoc/` and `.diffdocrc` are ignored by git by default.
150
+ - `summarize` requires a configured chat model.
151
+ - `embed` requires a configured embedding model.
152
+ - `query` requires both a configured chat model and embedding model.
153
+ - For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getVectraIndexPath = getVectraIndexPath;
7
+ exports.runEmbed = runEmbed;
8
+ const promises_1 = __importDefault(require("node:fs/promises"));
9
+ const node_path_1 = __importDefault(require("node:path"));
10
+ const vectra_1 = require("vectra");
11
+ const llm_1 = require("../utils/llm");
12
+ const paths_1 = require("../utils/paths");
13
+ const VECTRA_INDEX_DIR = "vectra";
14
+ function getVectraIndexPath(config) {
15
+ return node_path_1.default.resolve((0, paths_1.getDiffdocBaseDir)(config.baseDir), VECTRA_INDEX_DIR);
16
+ }
17
+ function buildDocument(filePath, summaryText, rawCodeSnapshot) {
18
+ return `File: ${filePath}\n` +
19
+ `Summary: ${summaryText}\n\n` +
20
+ `Code Snapshot:\n\`\`\`\n${rawCodeSnapshot}\n\`\`\``;
21
+ }
22
+ async function runEmbed(options, config) {
23
+ const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
24
+ const manifest = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
25
+ const entries = Object.entries(manifest.files);
26
+ const indexPath = getVectraIndexPath(config);
27
+ const index = new vectra_1.LocalIndex(indexPath);
28
+ await index.createIndex({
29
+ version: 1,
30
+ deleteIfExists: true,
31
+ metadata_config: {
32
+ indexed: ["filePath", "hash"]
33
+ }
34
+ });
35
+ if (entries.length === 0) {
36
+ console.log(`Created empty Vectra index at ${indexPath}.`);
37
+ return;
38
+ }
39
+ const documents = entries.map(([filePath, file]) => buildDocument(filePath, file.summaryText, file.rawCodeSnapshot));
40
+ const embeddings = await (0, llm_1.generateEmbeddings)(documents, config.embeddings);
41
+ await index.beginUpdate();
42
+ try {
43
+ for (let i = 0; i < entries.length; i += 1) {
44
+ const [filePath, file] = entries[i];
45
+ await index.upsertItem({
46
+ id: filePath,
47
+ vector: embeddings[i],
48
+ metadata: {
49
+ filePath,
50
+ hash: file.hash,
51
+ summaryText: file.summaryText,
52
+ rawCodeSnapshot: file.rawCodeSnapshot
53
+ }
54
+ });
55
+ }
56
+ await index.endUpdate();
57
+ }
58
+ catch (error) {
59
+ index.cancelUpdate();
60
+ throw error;
61
+ }
62
+ console.log(`Embedded ${entries.length} summaries into Vectra index at ${indexPath}.`);
63
+ }
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.runQuery = runQuery;
4
+ const vectra_1 = require("vectra");
5
+ const llm_1 = require("../utils/llm");
6
+ const embed_1 = require("./embed");
7
+ const CODE_QUERY_PREFIX = "Represent this query for searching relevant code: ";
8
+ function parseTopK(value) {
9
+ const topK = Number.parseInt(value, 10);
10
+ if (!Number.isInteger(topK) || topK < 1) {
11
+ throw new Error("Invalid --top value. Expected a positive integer.");
12
+ }
13
+ return topK;
14
+ }
15
+ function trimForDisplay(text, maxLength) {
16
+ if (text.length <= maxLength) {
17
+ return text;
18
+ }
19
+ return `${text.slice(0, maxLength).trimEnd()}...`;
20
+ }
21
+ function buildAnswerPrompt(question, results) {
22
+ const context = results.map((result, indexPosition) => {
23
+ const metadata = result.item.metadata;
24
+ return [
25
+ `Result ${indexPosition + 1}`,
26
+ `File: ${metadata.filePath}`,
27
+ `Score: ${result.score}`,
28
+ `Summary:\n${metadata.summaryText}`,
29
+ `Code Snapshot:\n${metadata.rawCodeSnapshot}`
30
+ ].join("\n");
31
+ }).join("\n\n---\n\n");
32
+ return `Answer the user's question using only the retrieved DiffDoc results below. If the results do not contain enough information, say what is missing. Prefer a direct answer first, then cite the relevant file paths. Keep the explanation appropriate to the question: summarize when asked for a summary, explain implementation details when asked how something works, and avoid unsupported claims.\n\nUser question:\n${question}\n\nRetrieved results:\n${context}`;
33
+ }
34
+ async function runQuery(message, options, config) {
35
+ const topK = parseTopK(options.top);
36
+ const indexPath = (0, embed_1.getVectraIndexPath)(config);
37
+ const index = new vectra_1.LocalIndex(indexPath);
38
+ if (!await index.isIndexCreated()) {
39
+ throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
40
+ }
41
+ const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
42
+ const results = await index.queryItems(queryVector, message, topK);
43
+ if (results.length === 0) {
44
+ console.log("No matching embedded summaries found.");
45
+ return;
46
+ }
47
+ const answer = await (0, llm_1.promptLlm)(buildAnswerPrompt(message, results), config.chat);
48
+ console.log(answer);
49
+ console.log("\nSources:");
50
+ for (const [indexPosition, result] of results.entries()) {
51
+ const metadata = result.item.metadata;
52
+ console.log(`${indexPosition + 1}. ${metadata.filePath} (${result.score.toFixed(4)})`);
53
+ }
54
+ if (!options.code) {
55
+ return;
56
+ }
57
+ for (const [indexPosition, result] of results.entries()) {
58
+ const metadata = result.item.metadata;
59
+ console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
60
+ console.log(`Score: ${result.score.toFixed(4)}`);
61
+ console.log(`Hash: ${metadata.hash}`);
62
+ console.log("Summary:");
63
+ console.log(trimForDisplay(metadata.summaryText, 1200));
64
+ if (options.code) {
65
+ console.log("Code Snapshot:");
66
+ console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
67
+ }
68
+ }
69
+ }
@@ -0,0 +1,113 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.runSummarize = runSummarize;
7
+ const promises_1 = __importDefault(require("node:fs/promises"));
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ const git_1 = require("../utils/git");
10
+ const hashing_1 = require("../utils/hashing");
11
+ const llm_1 = require("../utils/llm");
12
+ const paths_1 = require("../utils/paths");
13
+ const TARGET_EXTENSIONS = new Set([".ts", ".js", ".cs", ".py"]);
14
+ const IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist"]);
15
+ const IGNORED_FILES = new Set(["package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb"]);
16
+ function normalizeRelativePath(filePath) {
17
+ return filePath.split(node_path_1.default.sep).join("/");
18
+ }
19
+ function isTargetCodeFile(filePath) {
20
+ return TARGET_EXTENSIONS.has(node_path_1.default.extname(filePath)) && !IGNORED_FILES.has(node_path_1.default.basename(filePath));
21
+ }
22
+ async function readManifest(manifestPath) {
23
+ try {
24
+ return JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
25
+ }
26
+ catch (error) {
27
+ const nodeError = error;
28
+ if (nodeError.code === "ENOENT") {
29
+ return { lastSyncedCommit: "", files: {} };
30
+ }
31
+ throw error;
32
+ }
33
+ }
34
+ async function writeManifest(manifestPath, manifest) {
35
+ await promises_1.default.mkdir(node_path_1.default.dirname(manifestPath), { recursive: true });
36
+ await promises_1.default.writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
37
+ }
38
+ async function walkCodeFiles(rootPath, currentPath = rootPath) {
39
+ const entries = await promises_1.default.readdir(currentPath, { withFileTypes: true });
40
+ const files = [];
41
+ for (const entry of entries) {
42
+ const entryPath = node_path_1.default.join(currentPath, entry.name);
43
+ if (entry.isDirectory()) {
44
+ if (!IGNORED_DIRECTORIES.has(entry.name)) {
45
+ files.push(...await walkCodeFiles(rootPath, entryPath));
46
+ }
47
+ continue;
48
+ }
49
+ if (entry.isFile() && isTargetCodeFile(entry.name)) {
50
+ files.push(normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath)));
51
+ }
52
+ }
53
+ return files.sort();
54
+ }
55
+ async function summarizeFile(rootPath, relativePath, config) {
56
+ const absolutePath = node_path_1.default.join(rootPath, relativePath);
57
+ const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
58
+ return {
59
+ hash: (0, hashing_1.hashFileContent)(rawCodeSnapshot),
60
+ summaryText: await (0, llm_1.generateFunctionalSummary)(relativePath, rawCodeSnapshot, config.chat),
61
+ rawCodeSnapshot
62
+ };
63
+ }
64
+ async function runSummarize(options, config) {
65
+ if (options.mode !== "all" && options.mode !== "delta") {
66
+ throw new Error('Invalid summarize mode. Expected "all" or "delta".');
67
+ }
68
+ const commandCwd = process.cwd();
69
+ const repoPath = node_path_1.default.resolve(commandCwd, options.path);
70
+ const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.out, config.baseDir);
71
+ const manifest = options.mode === "delta" ? await readManifest(manifestPath) : { lastSyncedCommit: "", files: {} };
72
+ if (options.mode === "all") {
73
+ const files = await walkCodeFiles(repoPath);
74
+ manifest.files = {};
75
+ for (const filePath of files) {
76
+ manifest.files[filePath] = await summarizeFile(repoPath, filePath, config);
77
+ console.log(`Summarized ${filePath}`);
78
+ }
79
+ }
80
+ else {
81
+ const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
82
+ for (const deletedPath of deltas.deleted) {
83
+ delete manifest.files[deletedPath];
84
+ console.log(`Pruned ${deletedPath}`);
85
+ }
86
+ for (const filePath of deltas.modifiedOrAdded) {
87
+ const absolutePath = node_path_1.default.join(repoPath, filePath);
88
+ try {
89
+ const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
90
+ const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
91
+ if (manifest.files[filePath]?.hash === hash)
92
+ continue;
93
+ manifest.files[filePath] = {
94
+ hash,
95
+ summaryText: await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat),
96
+ rawCodeSnapshot
97
+ };
98
+ console.log(`Updated ${filePath}`);
99
+ }
100
+ catch (error) {
101
+ const nodeError = error;
102
+ if (nodeError.code === "ENOENT") {
103
+ delete manifest.files[filePath];
104
+ continue;
105
+ }
106
+ throw error;
107
+ }
108
+ }
109
+ }
110
+ manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
111
+ await writeManifest(manifestPath, manifest);
112
+ console.log(`Wrote manifest to ${manifestPath}`);
113
+ }
package/dist/config.js ADDED
@@ -0,0 +1,85 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.buildRuntimeConfig = buildRuntimeConfig;
7
+ const node_fs_1 = __importDefault(require("node:fs"));
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ function readOption(value, envName, fallback = "") {
10
+ return value || process.env[envName] || fallback;
11
+ }
12
+ function loadRcFile(configPath) {
13
+ const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
14
+ if (!node_fs_1.default.existsSync(resolvedPath)) {
15
+ if (configPath) {
16
+ throw new Error(`Config file not found: ${resolvedPath}`);
17
+ }
18
+ return {};
19
+ }
20
+ const parsed = JSON.parse(node_fs_1.default.readFileSync(resolvedPath, "utf8"));
21
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
22
+ throw new Error(`Config file must contain a JSON object: ${resolvedPath}`);
23
+ }
24
+ return parsed;
25
+ }
26
+ function mergeConfigOptions(options) {
27
+ const rcOptions = loadRcFile(options.config);
28
+ return {
29
+ ...rcOptions,
30
+ ...options
31
+ };
32
+ }
33
+ function readProvider(value) {
34
+ const provider = readOption(value, "AI_PROVIDER", "local");
35
+ if (provider !== "local" && provider !== "cloud") {
36
+ throw new Error('Invalid AI provider. Expected "local" or "cloud".');
37
+ }
38
+ return provider;
39
+ }
40
+ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
41
+ const mergedOptions = mergeConfigOptions(options);
42
+ const provider = readProvider(mergedOptions.aiProvider);
43
+ const apiKey = readOption(mergedOptions.openaiApiKey, "OPENAI_API_KEY", provider === "local" ? "local-key" : "");
44
+ const chatBaseURL = provider === "cloud"
45
+ ? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
46
+ : readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
47
+ const chatModel = provider === "cloud"
48
+ ? readOption(mergedOptions.cloudChatModel, "CLOUD_CHAT_MODEL", "gpt-4o-mini")
49
+ : readOption(mergedOptions.localChatModel, "LOCAL_CHAT_MODEL");
50
+ const embedBaseURL = provider === "cloud"
51
+ ? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
52
+ : readOption(mergedOptions.localEmbedEndpoint, "LOCAL_EMBED_ENDPOINT");
53
+ const embedModel = provider === "cloud"
54
+ ? readOption(mergedOptions.cloudEmbedModel, "CLOUD_EMBED_MODEL", "text-embedding-3-small")
55
+ : readOption(mergedOptions.localEmbedModel, "LOCAL_EMBED_MODEL");
56
+ if (needs.chat && !chatBaseURL) {
57
+ throw new Error(`Missing ${provider === "cloud" ? "cloud" : "local"} chat endpoint. Pass the runtime option or set ${provider === "cloud" ? "CLOUD_LLM_ENDPOINT" : "LOCAL_LLM_ENDPOINT"}.`);
58
+ }
59
+ if (needs.chat && !chatModel) {
60
+ throw new Error(`Missing ${provider === "cloud" ? "cloud" : "local"} chat model. Pass the runtime option or set ${provider === "cloud" ? "CLOUD_CHAT_MODEL" : "LOCAL_CHAT_MODEL"}.`);
61
+ }
62
+ if (needs.embeddings && !embedBaseURL) {
63
+ throw new Error(`Missing ${provider === "cloud" ? "cloud" : "local"} embedding endpoint. Pass the runtime option or set ${provider === "cloud" ? "CLOUD_LLM_ENDPOINT" : "LOCAL_EMBED_ENDPOINT"}.`);
64
+ }
65
+ if (needs.embeddings && !embedModel) {
66
+ throw new Error(`Missing ${provider === "cloud" ? "cloud" : "local"} embedding model. Pass the runtime option or set ${provider === "cloud" ? "CLOUD_EMBED_MODEL" : "LOCAL_EMBED_MODEL"}.`);
67
+ }
68
+ if (provider === "cloud" && (needs.chat || needs.embeddings) && !apiKey) {
69
+ throw new Error("Missing OpenAI API key. Pass --openai-api-key or set OPENAI_API_KEY.");
70
+ }
71
+ return {
72
+ baseDir: readOption(mergedOptions.baseDir, "DIFFDOC_BASE_DIR", "./.diffdoc"),
73
+ provider,
74
+ chat: {
75
+ apiKey,
76
+ baseURL: chatBaseURL,
77
+ model: chatModel
78
+ },
79
+ embeddings: {
80
+ apiKey,
81
+ baseURL: embedBaseURL,
82
+ model: embedModel
83
+ }
84
+ };
85
+ }
package/dist/index.js ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("./config");
6
+ const embed_1 = require("./commands/embed");
7
+ const query_1 = require("./commands/query");
8
+ const summarize_1 = require("./commands/summarize");
9
+ const llm_1 = require("./utils/llm");
10
+ const program = new commander_1.Command();
11
+ function addBaseOptions(command) {
12
+ return command
13
+ .option("--config <path>", "path to .diffdocrc JSON config file")
14
+ .option("--base-dir <path>", "DiffDoc artifact directory")
15
+ .option("--ai-provider <provider>", "AI provider: local or cloud");
16
+ }
17
+ function addChatOptions(command) {
18
+ return command
19
+ .option("--local-llm-endpoint <url>", "local OpenAI-compatible chat endpoint")
20
+ .option("--local-chat-model <model>", "local chat model name")
21
+ .option("--cloud-llm-endpoint <url>", "cloud OpenAI-compatible chat endpoint")
22
+ .option("--cloud-chat-model <model>", "cloud chat model name")
23
+ .option("--openai-api-key <key>", "OpenAI-compatible API key; falls back to OPENAI_API_KEY");
24
+ }
25
+ function addEmbeddingOptions(command) {
26
+ return command
27
+ .option("--local-embed-endpoint <url>", "local OpenAI-compatible embeddings endpoint")
28
+ .option("--local-embed-model <model>", "local embedding model name")
29
+ .option("--cloud-embed-model <model>", "cloud embedding model name");
30
+ }
31
+ function addCloudEndpointAndKeyOptions(command) {
32
+ return command
33
+ .option("--cloud-llm-endpoint <url>", "cloud OpenAI-compatible endpoint")
34
+ .option("--openai-api-key <key>", "OpenAI-compatible API key; falls back to OPENAI_API_KEY");
35
+ }
36
+ program
37
+ .name("diffdoc")
38
+ .description("Translate repository code shifts into plain-English business context")
39
+ .version("0.1.0");
40
+ addChatOptions(addBaseOptions(program
41
+ .command("summarize")))
42
+ .description("Summarize repository code into a portable JSON manifest")
43
+ .option("--path <path>", "repository or code path to scan", ".")
44
+ .option("--out <path>", "manifest output path under --base-dir", "manifest.json")
45
+ .option("--mode <mode>", "summarization mode: all or delta", "all")
46
+ .action(async (options) => {
47
+ try {
48
+ const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
49
+ await (0, summarize_1.runSummarize)({ path: options.path, out: options.out, mode: options.mode }, config);
50
+ }
51
+ catch (error) {
52
+ console.error(error instanceof Error ? error.message : error);
53
+ process.exit(1);
54
+ }
55
+ });
56
+ addChatOptions(addBaseOptions(program
57
+ .command("prompt")))
58
+ .description("Send a plain prompt to the configured LLM")
59
+ .argument("<message...>", "prompt text to send")
60
+ .action(async (messageParts, options) => {
61
+ try {
62
+ const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
63
+ const response = await (0, llm_1.promptLlm)(messageParts.join(" "), config.chat);
64
+ console.log(response);
65
+ }
66
+ catch (error) {
67
+ console.error(error instanceof Error ? error.message : error);
68
+ process.exit(1);
69
+ }
70
+ });
71
+ addEmbeddingOptions(addChatOptions(addBaseOptions(program
72
+ .command("query"))))
73
+ .description("Search the local Vectra index with a natural-language question")
74
+ .argument("<message...>", "question or search text")
75
+ .option("--top <count>", "number of matches to return", "5")
76
+ .option("--code", "include code snapshots in results", false)
77
+ .action(async (messageParts, options) => {
78
+ try {
79
+ const config = (0, config_1.buildRuntimeConfig)(options, { chat: true, embeddings: true });
80
+ await (0, query_1.runQuery)(messageParts.join(" "), options, config);
81
+ }
82
+ catch (error) {
83
+ console.error(error instanceof Error ? error.message : error);
84
+ process.exit(1);
85
+ }
86
+ });
87
+ addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
88
+ .command("embed"))))
89
+ .description("Embed manifest summaries into a local Vectra index")
90
+ .option("--manifest <path>", "manifest input path under --base-dir", "manifest.json")
91
+ .action(async (options) => {
92
+ try {
93
+ const config = (0, config_1.buildRuntimeConfig)(options, { embeddings: true });
94
+ await (0, embed_1.runEmbed)({ manifest: options.manifest }, config);
95
+ }
96
+ catch (error) {
97
+ console.error(error instanceof Error ? error.message : error);
98
+ process.exit(1);
99
+ }
100
+ });
101
+ program.parseAsync(process.argv).catch((error) => {
102
+ console.error(error instanceof Error ? error.message : error);
103
+ process.exit(1);
104
+ });
@@ -0,0 +1,62 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getGitDeltas = getGitDeltas;
7
+ exports.getCurrentCommit = getCurrentCommit;
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ const simple_git_1 = __importDefault(require("simple-git"));
10
+ const TARGET_EXTENSIONS = new Set([".ts", ".js", ".cs", ".py"]);
11
+ function normalizePath(filePath) {
12
+ return filePath.split(node_path_1.default.sep).join("/");
13
+ }
14
+ function isTargetCodeFile(filePath) {
15
+ return TARGET_EXTENSIONS.has(node_path_1.default.extname(filePath));
16
+ }
17
+ function addUnique(target, filePath) {
18
+ const normalized = normalizePath(filePath.trim());
19
+ if (normalized && isTargetCodeFile(normalized)) {
20
+ target.add(normalized);
21
+ }
22
+ }
23
+ async function getGitDeltas(repoPath, sinceRef) {
24
+ const git = (0, simple_git_1.default)(repoPath);
25
+ const modifiedOrAdded = new Set();
26
+ const deleted = new Set();
27
+ if (sinceRef) {
28
+ const output = await git.raw(["diff", "--name-status", `${sinceRef}..HEAD`]);
29
+ for (const line of output.split(/\r?\n/)) {
30
+ if (!line.trim())
31
+ continue;
32
+ const [status, ...rest] = line.split(/\s+/);
33
+ const filePath = rest[rest.length - 1];
34
+ if (status.startsWith("D")) {
35
+ addUnique(deleted, filePath);
36
+ }
37
+ else {
38
+ addUnique(modifiedOrAdded, filePath);
39
+ }
40
+ }
41
+ }
42
+ const status = await git.status();
43
+ for (const filePath of [...status.created, ...status.modified, ...status.renamed.map((item) => item.to)]) {
44
+ addUnique(modifiedOrAdded, filePath);
45
+ }
46
+ for (const filePath of status.deleted) {
47
+ addUnique(deleted, filePath);
48
+ }
49
+ return {
50
+ modifiedOrAdded: [...modifiedOrAdded].sort(),
51
+ deleted: [...deleted].sort()
52
+ };
53
+ }
54
+ async function getCurrentCommit(repoPath) {
55
+ const git = (0, simple_git_1.default)(repoPath);
56
+ try {
57
+ return (await git.revparse(["HEAD"])).trim();
58
+ }
59
+ catch {
60
+ return "uncommitted";
61
+ }
62
+ }
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.hashFileContent = hashFileContent;
4
+ const node_crypto_1 = require("node:crypto");
5
+ function hashFileContent(fileContent) {
6
+ return (0, node_crypto_1.createHash)("md5").update(fileContent, "utf8").digest("hex");
7
+ }
@@ -0,0 +1,56 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.generateFunctionalSummary = generateFunctionalSummary;
7
+ exports.promptLlm = promptLlm;
8
+ exports.generateEmbeddings = generateEmbeddings;
9
+ const openai_1 = __importDefault(require("openai"));
10
+ function createClient(config) {
11
+ return {
12
+ client: new openai_1.default({ apiKey: config.apiKey, baseURL: config.baseURL }),
13
+ model: config.model
14
+ };
15
+ }
16
+ async function generateFunctionalSummary(fileName, codeContent, config) {
17
+ const { client, model } = createClient(config);
18
+ const response = await client.chat.completions.create({
19
+ model,
20
+ temperature: 0.2,
21
+ messages: [
22
+ {
23
+ role: "system",
24
+ content: "Explain what this code does for non-technical stakeholders. Focus on business behavior, user impact, rules, data movement, and visible outcomes. Use plain English, avoid jargon, and provide zero conversational preamble."
25
+ },
26
+ {
27
+ role: "user",
28
+ content: `File: ${fileName}\n\nCode:\n${codeContent}`
29
+ }
30
+ ]
31
+ });
32
+ return response.choices[0]?.message?.content?.trim() || "No business behavior summary was returned.";
33
+ }
34
+ async function promptLlm(prompt, config) {
35
+ const { client, model } = createClient(config);
36
+ const response = await client.chat.completions.create({
37
+ model,
38
+ temperature: 0.2,
39
+ messages: [
40
+ {
41
+ role: "user",
42
+ content: prompt
43
+ }
44
+ ]
45
+ });
46
+ return response.choices[0]?.message?.content?.trim() || "No response was returned.";
47
+ }
48
+ async function generateEmbeddings(texts, config) {
49
+ const baseURL = config.baseURL.replace(/\/embeddings\/?$/, "");
50
+ const client = new openai_1.default({ apiKey: config.apiKey, baseURL });
51
+ const response = await client.embeddings.create({
52
+ model: config.model,
53
+ input: texts
54
+ });
55
+ return response.data.map((item) => item.embedding);
56
+ }
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getDiffdocBaseDir = getDiffdocBaseDir;
7
+ exports.resolveDiffdocArtifactPath = resolveDiffdocArtifactPath;
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ function getDiffdocBaseDir(baseDir) {
10
+ return node_path_1.default.resolve(process.cwd(), baseDir);
11
+ }
12
+ function resolveDiffdocArtifactPath(filePath, baseDir) {
13
+ if (node_path_1.default.isAbsolute(filePath)) {
14
+ return filePath;
15
+ }
16
+ return node_path_1.default.resolve(getDiffdocBaseDir(baseDir), filePath);
17
+ }
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "diffdoc",
3
+ "version": "0.1.0",
4
+ "description": "Translate repository code shifts into plain-English business context",
5
+ "license": "MIT",
6
+ "author": "Christopher Sullivan",
7
+ "homepage": "https://github.com/sullyTheDev/diffdoc#readme",
8
+ "bugs": {
9
+ "url": "https://github.com/sullyTheDev/diffdoc/issues"
10
+ },
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "git+https://github.com/sullyTheDev/diffdoc.git"
14
+ },
15
+ "type": "commonjs",
16
+ "main": "dist/index.js",
17
+ "bin": {
18
+ "diffdoc": "./dist/index.js"
19
+ },
20
+ "files": [
21
+ "dist",
22
+ "README.md",
23
+ "LICENSE",
24
+ ".diffdocrc.example"
25
+ ],
26
+ "engines": {
27
+ "node": ">=22"
28
+ },
29
+ "scripts": {
30
+ "build": "tsc",
31
+ "clean": "node -e \"require('fs').rmSync('dist', { recursive: true, force: true })\"",
32
+ "start": "tsc && node ./dist/index.js",
33
+ "prepare": "npm run build"
34
+ },
35
+ "dependencies": {
36
+ "commander": "^12.0.0",
37
+ "openai": "^4.28.0",
38
+ "simple-git": "^3.24.0",
39
+ "vectra": "^0.14.0"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^20.19.41",
43
+ "typescript": "^5.3.3"
44
+ }
45
+ }