diffdoc 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.diffdocrc.example +4 -1
- package/README.md +90 -15
- package/dist/commands/embed.js +110 -24
- package/dist/commands/init.js +221 -0
- package/dist/commands/query.js +2 -2
- package/dist/commands/status.js +166 -0
- package/dist/commands/summarize.js +366 -42
- package/dist/config.js +24 -0
- package/dist/index.js +53 -16
- package/dist/services/retrieval.js +2 -2
- package/dist/types/artifacts.js +5 -0
- package/dist/utils/git.js +1 -5
- package/dist/utils/llm.js +2 -2
- package/package.json +3 -3
package/.diffdocrc.example
CHANGED
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Project Description
|
|
4
4
|
|
|
5
|
-
DiffDoc turns source code into searchable, plain-English project context. It scans repository files, asks an OpenAI-compatible chat model to summarize the business behavior in each file, stores
|
|
5
|
+
DiffDoc turns source code into searchable, plain-English project context. It scans repository files, asks an OpenAI-compatible chat model to summarize the business behavior in each file, stores the summaries as portable per-hash JSON assets, embeds those assets into a local Vectra index, and answers questions using the indexed results as retrieval context.
|
|
6
6
|
|
|
7
7
|
The project is designed for teams that need fast codebase comprehension without requiring every stakeholder to read implementation details. It can run against local model servers such as Ollama, LM Studio, or vLLM, or against cloud OpenAI-compatible APIs.
|
|
8
8
|
|
|
@@ -34,7 +34,9 @@ Package scripts can call the installed binary:
|
|
|
34
34
|
```json
|
|
35
35
|
{
|
|
36
36
|
"scripts": {
|
|
37
|
+
"diffdoc:init": "diffdoc init",
|
|
37
38
|
"diffdoc:summarize": "diffdoc summarize",
|
|
39
|
+
"diffdoc:status": "diffdoc status",
|
|
38
40
|
"diffdoc:embed": "diffdoc embed",
|
|
39
41
|
"diffdoc:search": "diffdoc search",
|
|
40
42
|
"diffdoc:query": "diffdoc query",
|
|
@@ -72,37 +74,68 @@ Example config with all supported keys:
|
|
|
72
74
|
"cloudLlmEndpoint": "https://api.openai.com/v1",
|
|
73
75
|
"cloudChatModel": "gpt-4o-mini",
|
|
74
76
|
"cloudEmbedModel": "text-embedding-3-small",
|
|
75
|
-
"openaiApiKey": ""
|
|
77
|
+
"openaiApiKey": "",
|
|
78
|
+
"includeGlobs": [],
|
|
79
|
+
"excludeGlobs": [],
|
|
80
|
+
"ignoreFile": ".diffdocignore"
|
|
76
81
|
}
|
|
77
82
|
```
|
|
78
83
|
|
|
79
|
-
Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, and `
|
|
84
|
+
Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, `OPENAI_API_KEY`, `DIFFDOC_INCLUDE_GLOBS`, `DIFFDOC_EXCLUDE_GLOBS`, and `DIFFDOC_IGNORE_FILE`.
|
|
80
85
|
|
|
81
86
|
## Manifest-First Design
|
|
82
87
|
|
|
83
|
-
DiffDoc separates summarization from embedding. The `summarize` command writes
|
|
88
|
+
DiffDoc separates summarization from embedding. The `summarize` command writes file-to-hash mappings to `manifest.json` and stores each summary in an independent hash-addressed JSON file under `./.diffdoc/summaries/`.
|
|
84
89
|
|
|
85
90
|
The manifest is plain JSON and contains one entry per tracked file:
|
|
86
91
|
|
|
87
92
|
```json
|
|
88
93
|
{
|
|
94
|
+
"schemaVersion": 2,
|
|
89
95
|
"lastSyncedCommit": "string-hash",
|
|
90
96
|
"files": {
|
|
91
|
-
"src/example.ts":
|
|
92
|
-
"hash": "md5-string",
|
|
93
|
-
"summaryText": "Plain-English explanation text here.",
|
|
94
|
-
"rawCodeSnapshot": "Full code text here..."
|
|
95
|
-
}
|
|
97
|
+
"src/example.ts": "md5-string"
|
|
96
98
|
}
|
|
97
99
|
}
|
|
98
100
|
```
|
|
99
101
|
|
|
102
|
+
Example summary asset at `./.diffdoc/summaries/<hash>.json`:
|
|
103
|
+
|
|
104
|
+
```json
|
|
105
|
+
{
|
|
106
|
+
"schemaVersion": 1,
|
|
107
|
+
"content_hash": "md5-string",
|
|
108
|
+
"summary": "Plain-English explanation text here.",
|
|
109
|
+
"raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
100
113
|
Because the summaries are stored independently, users do not have to embed immediately. They can review, archive, transform, or embed the manifest later using their preferred vectorization model and storage solution.
|
|
101
114
|
|
|
102
115
|
DiffDoc includes `diffdoc embed` as a built-in convenience path for creating a local Vectra index, but the manifest can also be consumed by other tools such as custom OpenAI-compatible embedding pipelines, hosted vector databases, local search systems, or internal documentation workflows.
|
|
103
116
|
|
|
104
117
|
## Commands
|
|
105
118
|
|
|
119
|
+
Initialize DiffDoc configuration for a repository:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
diffdoc init
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Use defaults without prompts:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
diffdoc init --yes
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Choose a provider and overwrite an existing config file:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
diffdoc init --provider cloud --force
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
`init` creates or updates repo setup files, appends missing `.gitignore` entries, and prints next commands. It does not run `summarize` or `embed`.
|
|
138
|
+
|
|
106
139
|
Summarize a repository into `./.diffdoc/manifest.json`:
|
|
107
140
|
|
|
108
141
|
```bash
|
|
@@ -115,12 +148,54 @@ Summarize only changed Git files using the existing manifest state:
|
|
|
115
148
|
diffdoc summarize --path . --mode delta
|
|
116
149
|
```
|
|
117
150
|
|
|
151
|
+
Store raw code snapshots in summary assets:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
diffdoc summarize --path . --mode all --include-code-snapshot
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Add include/exclude filters at runtime:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
diffdoc summarize --path . --mode all --include-glob "src/**/*.ts" --exclude-glob "**/*.test.ts"
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Emit a CI-friendly JSON summarize report:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
diffdoc summarize --path . --mode delta --json
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Inspect manifest-relative artifact freshness:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
diffdoc status
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Use a custom manifest path under `--base-dir`:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
diffdoc status --manifest manifest.json
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Emit CI-friendly JSON output:
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
diffdoc status --json
|
|
185
|
+
```
|
|
186
|
+
|
|
118
187
|
Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
|
|
119
188
|
|
|
120
189
|
```bash
|
|
121
190
|
diffdoc embed
|
|
122
191
|
```
|
|
123
192
|
|
|
193
|
+
Force full index rebuild:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
diffdoc embed --rebuild
|
|
197
|
+
```
|
|
198
|
+
|
|
124
199
|
Search the local Vectra index and print raw matches:
|
|
125
200
|
|
|
126
201
|
```bash
|
|
@@ -145,12 +220,6 @@ Include retrieved code snapshots after the generated answer:
|
|
|
145
220
|
diffdoc query "How does embedding work?" --top 3 --code
|
|
146
221
|
```
|
|
147
222
|
|
|
148
|
-
Prompt the configured chat model directly:
|
|
149
|
-
|
|
150
|
-
```bash
|
|
151
|
-
diffdoc prompt "Confirm the configured model is reachable."
|
|
152
|
-
```
|
|
153
|
-
|
|
154
223
|
Use a custom config file:
|
|
155
224
|
|
|
156
225
|
```bash
|
|
@@ -226,8 +295,14 @@ Run `diffdoc summarize` and `diffdoc embed` before using the MCP server, otherwi
|
|
|
226
295
|
|
|
227
296
|
- Node.js `>=22` is required because Vectra requires it.
|
|
228
297
|
- This repository ignores `.diffdoc/vectra` and `.diffdocrc`; add similar entries to your project's `.gitignore` if you do not want generated indexes or local config committed. The manifest at `.diffdoc/manifest.json` is not ignored by this repository.
|
|
298
|
+
- Summary assets are written to `.diffdoc/summaries/*.json`.
|
|
299
|
+
- Manifest schema is currently `schemaVersion: 2`; older manifest shapes are not auto-migrated.
|
|
229
300
|
- Commit `.diffdoc/manifest.json` when using delta workflows. Delta summarization reads the previous manifest state to decide which changed files need fresh summaries.
|
|
230
301
|
- `summarize` requires a configured chat model.
|
|
302
|
+
- `summarize` prints run progress and final totals (`scanned`, `skipped`, `updated`, `failed`, `pruned`).
|
|
303
|
+
- `summarize --json` prints a single machine-readable run report to stdout for CI parsing.
|
|
304
|
+
- `status` does not require a configured chat or embedding model.
|
|
305
|
+
- `status --json` prints a machine-readable report with summary and index freshness details.
|
|
231
306
|
- `embed` requires a configured embedding model.
|
|
232
307
|
- `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
|
|
233
308
|
- `query` requires both a configured chat model and embedding model.
|
package/dist/commands/embed.js
CHANGED
|
@@ -8,56 +8,142 @@ exports.runEmbed = runEmbed;
|
|
|
8
8
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
9
9
|
const node_path_1 = __importDefault(require("node:path"));
|
|
10
10
|
const vectra_1 = require("vectra");
|
|
11
|
+
const artifacts_1 = require("../types/artifacts");
|
|
11
12
|
const llm_1 = require("../utils/llm");
|
|
12
13
|
const paths_1 = require("../utils/paths");
|
|
13
14
|
const VECTRA_INDEX_DIR = "vectra";
|
|
14
15
|
function getVectraIndexPath(config) {
|
|
15
16
|
return node_path_1.default.resolve((0, paths_1.getDiffdocBaseDir)(config.baseDir), VECTRA_INDEX_DIR);
|
|
16
17
|
}
|
|
18
|
+
function getSummaryDir(manifestPath) {
|
|
19
|
+
return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
|
|
20
|
+
}
|
|
21
|
+
function getSummaryPath(summaryDir, hash) {
|
|
22
|
+
return node_path_1.default.resolve(summaryDir, `${hash}.json`);
|
|
23
|
+
}
|
|
24
|
+
async function readManifest(manifestPath) {
|
|
25
|
+
const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
|
|
26
|
+
if (parsed.schemaVersion !== artifacts_1.MANIFEST_SCHEMA_VERSION) {
|
|
27
|
+
throw new Error(`Unsupported manifest schema in ${manifestPath}. Expected schemaVersion ${artifacts_1.MANIFEST_SCHEMA_VERSION}.`);
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
|
|
31
|
+
lastSyncedCommit: typeof parsed.lastSyncedCommit === "string" ? parsed.lastSyncedCommit : "",
|
|
32
|
+
files: parsed.files && typeof parsed.files === "object" ? parsed.files : {}
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
async function readSummaryAsset(summaryPath) {
|
|
36
|
+
const parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
|
|
37
|
+
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
|
|
38
|
+
throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion ${artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION}.`);
|
|
39
|
+
}
|
|
40
|
+
if (typeof parsed.content_hash !== "string") {
|
|
41
|
+
throw new Error(`Invalid summary hash in ${summaryPath}.`);
|
|
42
|
+
}
|
|
43
|
+
if (typeof parsed.summary !== "string") {
|
|
44
|
+
throw new Error(`Invalid summary text in ${summaryPath}.`);
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
|
|
48
|
+
content_hash: parsed.content_hash,
|
|
49
|
+
summary: parsed.summary,
|
|
50
|
+
raw_code_snapshot: typeof parsed.raw_code_snapshot === "string" ? parsed.raw_code_snapshot : undefined
|
|
51
|
+
};
|
|
52
|
+
}
|
|
17
53
|
function buildDocument(filePath, summaryText, rawCodeSnapshot) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
54
|
+
let output = `File: ${filePath}\nSummary: ${summaryText}`;
|
|
55
|
+
if (rawCodeSnapshot) {
|
|
56
|
+
output += `\n\nCode Snapshot:\n\`\`\`\n${rawCodeSnapshot}\n\`\`\``;
|
|
57
|
+
}
|
|
58
|
+
return output;
|
|
21
59
|
}
|
|
22
60
|
async function runEmbed(options, config) {
|
|
23
61
|
const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
|
|
24
|
-
const manifest =
|
|
62
|
+
const manifest = await readManifest(manifestPath);
|
|
25
63
|
const entries = Object.entries(manifest.files);
|
|
64
|
+
const summaryDir = getSummaryDir(manifestPath);
|
|
26
65
|
const indexPath = getVectraIndexPath(config);
|
|
27
66
|
const index = new vectra_1.LocalIndex(indexPath);
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
67
|
+
if (options.rebuild) {
|
|
68
|
+
await index.createIndex({
|
|
69
|
+
version: 1,
|
|
70
|
+
deleteIfExists: true,
|
|
71
|
+
metadata_config: {
|
|
72
|
+
indexed: ["filePath", "hash"]
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
else if (!await index.isIndexCreated()) {
|
|
77
|
+
await index.createIndex({
|
|
78
|
+
version: 1,
|
|
79
|
+
deleteIfExists: false,
|
|
80
|
+
metadata_config: {
|
|
81
|
+
indexed: ["filePath", "hash"]
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
const existingItems = await index.listItems();
|
|
86
|
+
const existingByPath = new Map(existingItems.map((item) => [item.id, item]));
|
|
87
|
+
const toUpsert = [];
|
|
88
|
+
for (const [filePath, hash] of entries) {
|
|
89
|
+
const existing = existingByPath.get(filePath);
|
|
90
|
+
if (existing?.metadata.hash === hash) {
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
94
|
+
const summaryAsset = await readSummaryAsset(summaryPath);
|
|
95
|
+
if (summaryAsset.content_hash !== hash) {
|
|
96
|
+
throw new Error(`Hash mismatch in summary asset ${summaryPath}.`);
|
|
33
97
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
98
|
+
toUpsert.push({
|
|
99
|
+
filePath,
|
|
100
|
+
hash,
|
|
101
|
+
summaryText: summaryAsset.summary,
|
|
102
|
+
rawCodeSnapshot: summaryAsset.raw_code_snapshot,
|
|
103
|
+
document: buildDocument(filePath, summaryAsset.summary, summaryAsset.raw_code_snapshot)
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
const activePathSet = new Set(entries.map(([filePath]) => filePath));
|
|
107
|
+
const toDelete = existingItems
|
|
108
|
+
.map((item) => item.id)
|
|
109
|
+
.filter((id) => Boolean(id) && !activePathSet.has(id));
|
|
110
|
+
if (toUpsert.length === 0 && toDelete.length === 0) {
|
|
111
|
+
console.log(`Index is already up to date at ${indexPath}.`);
|
|
37
112
|
return;
|
|
38
113
|
}
|
|
39
|
-
const
|
|
40
|
-
|
|
114
|
+
const embeddings = toUpsert.length > 0
|
|
115
|
+
? await (0, llm_1.generateEmbeddings)(toUpsert.map((item) => item.document), config.embeddings)
|
|
116
|
+
: [];
|
|
41
117
|
await index.beginUpdate();
|
|
42
118
|
try {
|
|
43
|
-
for (let i = 0; i <
|
|
44
|
-
const
|
|
119
|
+
for (let i = 0; i < toUpsert.length; i += 1) {
|
|
120
|
+
const item = toUpsert[i];
|
|
121
|
+
const metadata = item.rawCodeSnapshot
|
|
122
|
+
? {
|
|
123
|
+
filePath: item.filePath,
|
|
124
|
+
hash: item.hash,
|
|
125
|
+
summaryText: item.summaryText,
|
|
126
|
+
rawCodeSnapshot: item.rawCodeSnapshot
|
|
127
|
+
}
|
|
128
|
+
: {
|
|
129
|
+
filePath: item.filePath,
|
|
130
|
+
hash: item.hash,
|
|
131
|
+
summaryText: item.summaryText
|
|
132
|
+
};
|
|
45
133
|
await index.upsertItem({
|
|
46
|
-
id: filePath,
|
|
134
|
+
id: item.filePath,
|
|
47
135
|
vector: embeddings[i],
|
|
48
|
-
metadata
|
|
49
|
-
filePath,
|
|
50
|
-
hash: file.hash,
|
|
51
|
-
summaryText: file.summaryText,
|
|
52
|
-
rawCodeSnapshot: file.rawCodeSnapshot
|
|
53
|
-
}
|
|
136
|
+
metadata
|
|
54
137
|
});
|
|
55
138
|
}
|
|
139
|
+
for (const itemId of toDelete) {
|
|
140
|
+
await index.deleteItem(itemId);
|
|
141
|
+
}
|
|
56
142
|
await index.endUpdate();
|
|
57
143
|
}
|
|
58
144
|
catch (error) {
|
|
59
145
|
index.cancelUpdate();
|
|
60
146
|
throw error;
|
|
61
147
|
}
|
|
62
|
-
console.log(`Embedded ${
|
|
148
|
+
console.log(`Embedded ${toUpsert.length} summaries and pruned ${toDelete.length} items in ${indexPath}.`);
|
|
63
149
|
}
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.runInit = runInit;
|
|
7
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
+
const promises_2 = require("node:readline/promises");
|
|
10
|
+
const node_process_1 = require("node:process");
|
|
11
|
+
const DEFAULT_CONFIG = {
|
|
12
|
+
baseDir: "./.diffdoc",
|
|
13
|
+
aiProvider: "local",
|
|
14
|
+
localLlmEndpoint: "http://localhost:11434/v1",
|
|
15
|
+
localEmbedEndpoint: "http://localhost:11434/v1/embeddings",
|
|
16
|
+
localChatModel: "qwen2.5-coder:7b",
|
|
17
|
+
localEmbedModel: "nomic-embed-code",
|
|
18
|
+
cloudLlmEndpoint: "https://api.openai.com/v1",
|
|
19
|
+
cloudChatModel: "gpt-4o-mini",
|
|
20
|
+
cloudEmbedModel: "text-embedding-3-small",
|
|
21
|
+
openaiApiKey: "",
|
|
22
|
+
includeGlobs: [],
|
|
23
|
+
excludeGlobs: [],
|
|
24
|
+
ignoreFile: ".diffdocignore"
|
|
25
|
+
};
|
|
26
|
+
function parseProvider(value, fallback) {
|
|
27
|
+
const provider = value || fallback;
|
|
28
|
+
if (provider !== "local" && provider !== "cloud") {
|
|
29
|
+
throw new Error('Invalid init provider. Expected "local" or "cloud".');
|
|
30
|
+
}
|
|
31
|
+
return provider;
|
|
32
|
+
}
|
|
33
|
+
function parseCsv(value) {
|
|
34
|
+
return value.split(",").map((item) => item.trim()).filter(Boolean);
|
|
35
|
+
}
|
|
36
|
+
function toDisplayPath(filePath) {
|
|
37
|
+
return filePath.split(node_path_1.default.sep).join("/");
|
|
38
|
+
}
|
|
39
|
+
function resolveRepoPath(filePath) {
|
|
40
|
+
return node_path_1.default.resolve(process.cwd(), filePath);
|
|
41
|
+
}
|
|
42
|
+
function relativeToRepo(absolutePath) {
|
|
43
|
+
const relative = node_path_1.default.relative(process.cwd(), absolutePath) || ".";
|
|
44
|
+
return toDisplayPath(relative);
|
|
45
|
+
}
|
|
46
|
+
function normalizeRepoPattern(value) {
|
|
47
|
+
return toDisplayPath(value.trim())
|
|
48
|
+
.replace(/^\.\//, "")
|
|
49
|
+
.replace(/\/+/g, "/")
|
|
50
|
+
.replace(/\/$/, "");
|
|
51
|
+
}
|
|
52
|
+
async function fileExists(filePath) {
|
|
53
|
+
try {
|
|
54
|
+
await promises_1.default.access(filePath);
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
async function readExistingConfig(configPath) {
|
|
62
|
+
try {
|
|
63
|
+
const parsed = JSON.parse(await promises_1.default.readFile(configPath, "utf8"));
|
|
64
|
+
return parsed && typeof parsed === "object" ? parsed : {};
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
const nodeError = error;
|
|
68
|
+
if (nodeError.code === "ENOENT") {
|
|
69
|
+
return {};
|
|
70
|
+
}
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
async function promptText(rl, question, fallback) {
|
|
75
|
+
const suffix = fallback ? ` (${fallback})` : "";
|
|
76
|
+
const answer = (await rl.question(`${question}${suffix}: `)).trim();
|
|
77
|
+
return answer || fallback;
|
|
78
|
+
}
|
|
79
|
+
async function promptBoolean(rl, question, fallback) {
|
|
80
|
+
const suffix = fallback ? "Y/n" : "y/N";
|
|
81
|
+
const answer = (await rl.question(`${question} (${suffix}): `)).trim().toLowerCase();
|
|
82
|
+
if (!answer)
|
|
83
|
+
return fallback;
|
|
84
|
+
return answer === "y" || answer === "yes";
|
|
85
|
+
}
|
|
86
|
+
async function buildInteractiveConfig(options, existing) {
|
|
87
|
+
const rl = (0, promises_2.createInterface)({ input: node_process_1.stdin, output: node_process_1.stdout });
|
|
88
|
+
try {
|
|
89
|
+
const fallbackProvider = parseProvider(existing.aiProvider, "local");
|
|
90
|
+
const provider = parseProvider(options.provider || await promptText(rl, "AI provider: local or cloud", fallbackProvider), fallbackProvider);
|
|
91
|
+
const baseDir = options.baseDir || await promptText(rl, "DiffDoc artifact directory", existing.baseDir || DEFAULT_CONFIG.baseDir);
|
|
92
|
+
const ignoreFile = await promptText(rl, "Ignore file path", existing.ignoreFile || DEFAULT_CONFIG.ignoreFile);
|
|
93
|
+
const includeGlobs = parseCsv(await promptText(rl, "Include globs, comma-separated", (existing.includeGlobs || []).join(",")));
|
|
94
|
+
const excludeGlobs = parseCsv(await promptText(rl, "Exclude globs, comma-separated", (existing.excludeGlobs || []).join(",")));
|
|
95
|
+
const config = {
|
|
96
|
+
...DEFAULT_CONFIG,
|
|
97
|
+
...existing,
|
|
98
|
+
baseDir,
|
|
99
|
+
aiProvider: provider,
|
|
100
|
+
ignoreFile,
|
|
101
|
+
includeGlobs,
|
|
102
|
+
excludeGlobs
|
|
103
|
+
};
|
|
104
|
+
if (provider === "local") {
|
|
105
|
+
config.localLlmEndpoint = await promptText(rl, "Local chat endpoint", config.localLlmEndpoint || DEFAULT_CONFIG.localLlmEndpoint);
|
|
106
|
+
config.localChatModel = await promptText(rl, "Local chat model", config.localChatModel || DEFAULT_CONFIG.localChatModel);
|
|
107
|
+
config.localEmbedEndpoint = await promptText(rl, "Local embedding endpoint", config.localEmbedEndpoint || DEFAULT_CONFIG.localEmbedEndpoint);
|
|
108
|
+
config.localEmbedModel = await promptText(rl, "Local embedding model", config.localEmbedModel || DEFAULT_CONFIG.localEmbedModel);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
config.cloudLlmEndpoint = await promptText(rl, "Cloud OpenAI-compatible endpoint", config.cloudLlmEndpoint || DEFAULT_CONFIG.cloudLlmEndpoint);
|
|
112
|
+
config.cloudChatModel = await promptText(rl, "Cloud chat model", config.cloudChatModel || DEFAULT_CONFIG.cloudChatModel);
|
|
113
|
+
config.cloudEmbedModel = await promptText(rl, "Cloud embedding model", config.cloudEmbedModel || DEFAULT_CONFIG.cloudEmbedModel);
|
|
114
|
+
if (await promptBoolean(rl, "Store OPENAI_API_KEY in config file", false)) {
|
|
115
|
+
config.openaiApiKey = await promptText(rl, "OpenAI-compatible API key", config.openaiApiKey || "");
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
config.openaiApiKey = "";
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return config;
|
|
122
|
+
}
|
|
123
|
+
finally {
|
|
124
|
+
rl.close();
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
function buildYesConfig(options, existing) {
|
|
128
|
+
return {
|
|
129
|
+
...DEFAULT_CONFIG,
|
|
130
|
+
...existing,
|
|
131
|
+
baseDir: options.baseDir || existing.baseDir || DEFAULT_CONFIG.baseDir,
|
|
132
|
+
aiProvider: parseProvider(options.provider || existing.aiProvider, "local"),
|
|
133
|
+
openaiApiKey: ""
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
async function writeJsonFile(filePath, value, summary, force) {
|
|
137
|
+
const exists = await fileExists(filePath);
|
|
138
|
+
if (exists && !force) {
|
|
139
|
+
summary.skipped.push(relativeToRepo(filePath));
|
|
140
|
+
summary.warnings.push(`${relativeToRepo(filePath)} already exists; pass --force to overwrite.`);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(filePath), { recursive: true });
|
|
144
|
+
await promises_1.default.writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8");
|
|
145
|
+
(exists ? summary.updated : summary.created).push(relativeToRepo(filePath));
|
|
146
|
+
}
|
|
147
|
+
function buildStarterIgnore(baseDir) {
|
|
148
|
+
const normalizedBaseDir = normalizeRepoPattern(baseDir);
|
|
149
|
+
return [
|
|
150
|
+
"# DiffDoc ignore patterns",
|
|
151
|
+
"node_modules/**",
|
|
152
|
+
".git/**",
|
|
153
|
+
`${normalizedBaseDir}/**`,
|
|
154
|
+
"dist/**",
|
|
155
|
+
""
|
|
156
|
+
].join("\n");
|
|
157
|
+
}
|
|
158
|
+
async function createIgnoreFile(ignorePath, config, summary) {
|
|
159
|
+
if (await fileExists(ignorePath)) {
|
|
160
|
+
summary.skipped.push(relativeToRepo(ignorePath));
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
await promises_1.default.writeFile(ignorePath, buildStarterIgnore(config.baseDir), "utf8");
|
|
164
|
+
summary.created.push(relativeToRepo(ignorePath));
|
|
165
|
+
}
|
|
166
|
+
function buildGitignoreEntries(configPath, config) {
|
|
167
|
+
const configRelative = normalizeRepoPattern(relativeToRepo(configPath));
|
|
168
|
+
const baseDir = normalizeRepoPattern(config.baseDir);
|
|
169
|
+
return [`${baseDir}/vectra/`, configRelative];
|
|
170
|
+
}
|
|
171
|
+
async function updateGitignore(configPath, config, summary) {
|
|
172
|
+
const gitignorePath = resolveRepoPath(".gitignore");
|
|
173
|
+
const entries = buildGitignoreEntries(configPath, config);
|
|
174
|
+
let existing = "";
|
|
175
|
+
let exists = false;
|
|
176
|
+
try {
|
|
177
|
+
existing = await promises_1.default.readFile(gitignorePath, "utf8");
|
|
178
|
+
exists = true;
|
|
179
|
+
}
|
|
180
|
+
catch (error) {
|
|
181
|
+
const nodeError = error;
|
|
182
|
+
if (nodeError.code !== "ENOENT")
|
|
183
|
+
throw error;
|
|
184
|
+
}
|
|
185
|
+
const existingLines = new Set(existing.split(/\r?\n/).map(normalizeRepoPattern).filter(Boolean));
|
|
186
|
+
const missing = entries.filter((entry) => !existingLines.has(normalizeRepoPattern(entry)));
|
|
187
|
+
if (missing.length === 0) {
|
|
188
|
+
summary.skipped.push(".gitignore");
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
const prefix = existing && !existing.endsWith("\n") ? "\n" : "";
|
|
192
|
+
await promises_1.default.writeFile(gitignorePath, `${existing}${prefix}${missing.join("\n")}\n`, "utf8");
|
|
193
|
+
(exists ? summary.updated : summary.created).push(".gitignore");
|
|
194
|
+
}
|
|
195
|
+
function printList(label, values) {
|
|
196
|
+
console.log(`${label}: ${values.length > 0 ? values.join(", ") : "none"}`);
|
|
197
|
+
}
|
|
198
|
+
async function runInit(options) {
|
|
199
|
+
const summary = { created: [], updated: [], skipped: [], warnings: [] };
|
|
200
|
+
const configPath = resolveRepoPath(options.config || ".diffdocrc");
|
|
201
|
+
const existingConfig = await readExistingConfig(configPath);
|
|
202
|
+
const config = options.yes
|
|
203
|
+
? buildYesConfig(options, existingConfig)
|
|
204
|
+
: await buildInteractiveConfig(options, existingConfig);
|
|
205
|
+
const ignorePath = resolveRepoPath(config.ignoreFile || ".diffdocignore");
|
|
206
|
+
await writeJsonFile(configPath, config, summary, options.force);
|
|
207
|
+
await createIgnoreFile(ignorePath, config, summary);
|
|
208
|
+
await updateGitignore(configPath, config, summary);
|
|
209
|
+
console.log("DiffDoc init complete.");
|
|
210
|
+
console.log("");
|
|
211
|
+
console.log("Init changes:");
|
|
212
|
+
printList("Created", summary.created);
|
|
213
|
+
printList("Updated", summary.updated);
|
|
214
|
+
printList("Skipped", summary.skipped);
|
|
215
|
+
printList("Warnings", summary.warnings);
|
|
216
|
+
console.log("");
|
|
217
|
+
console.log("Next commands:");
|
|
218
|
+
console.log("1. diffdoc summarize --path . --mode all");
|
|
219
|
+
console.log("2. diffdoc embed");
|
|
220
|
+
console.log("3. diffdoc status");
|
|
221
|
+
}
|
package/dist/commands/query.js
CHANGED
|
@@ -25,7 +25,7 @@ async function runQuery(message, options, config) {
|
|
|
25
25
|
console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
|
|
26
26
|
if (options.code) {
|
|
27
27
|
console.log("Code Snapshot:");
|
|
28
|
-
console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot, 2000));
|
|
28
|
+
console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot || "(not stored)", 2000));
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
}
|
|
@@ -44,7 +44,7 @@ async function runSearch(message, options, config) {
|
|
|
44
44
|
console.log((0, retrieval_1.trimForDisplay)(result.summaryText, 1200));
|
|
45
45
|
if (options.code) {
|
|
46
46
|
console.log("Code Snapshot:");
|
|
47
|
-
console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot, 2000));
|
|
47
|
+
console.log((0, retrieval_1.trimForDisplay)(result.rawCodeSnapshot || "(not stored)", 2000));
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
50
|
}
|