diffdoc 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.diffdocrc.example +1 -0
- package/README.md +7 -1
- package/dist/commands/embed.js +23 -22
- package/dist/config.js +14 -1
- package/dist/index.js +3 -2
- package/package.json +1 -1
package/.diffdocrc.example
CHANGED
package/README.md
CHANGED
|
@@ -190,6 +190,12 @@ Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
|
|
|
190
190
|
diffdoc embed
|
|
191
191
|
```
|
|
192
192
|
|
|
193
|
+
Limit how many summary documents are sent per embeddings request:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
diffdoc embed --embed-batch-size 20
|
|
197
|
+
```
|
|
198
|
+
|
|
193
199
|
Force full index rebuild:
|
|
194
200
|
|
|
195
201
|
```bash
|
|
@@ -303,7 +309,7 @@ Run `diffdoc summarize` and `diffdoc embed` before using the MCP server, otherwi
|
|
|
303
309
|
- `summarize --json` prints a single machine-readable run report to stdout for CI parsing.
|
|
304
310
|
- `status` does not require a configured chat or embedding model.
|
|
305
311
|
- `status --json` prints a machine-readable report with summary and index freshness details.
|
|
306
|
-
- `embed` requires a configured embedding model.
|
|
312
|
+
- `embed` requires a configured embedding model. Use `embedBatchSize` in `.diffdocrc`, `DIFFDOC_EMBED_BATCH_SIZE`, or `--embed-batch-size` to tune how many summary documents are sent per embeddings request.
|
|
307
313
|
- `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
|
|
308
314
|
- `query` requires both a configured chat model and embedding model.
|
|
309
315
|
- For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
|
package/dist/commands/embed.js
CHANGED
|
@@ -111,30 +111,31 @@ async function runEmbed(options, config) {
|
|
|
111
111
|
console.log(`Index is already up to date at ${indexPath}.`);
|
|
112
112
|
return;
|
|
113
113
|
}
|
|
114
|
-
const embeddings = toUpsert.length > 0
|
|
115
|
-
? await (0, llm_1.generateEmbeddings)(toUpsert.map((item) => item.document), config.embeddings)
|
|
116
|
-
: [];
|
|
117
114
|
await index.beginUpdate();
|
|
118
115
|
try {
|
|
119
|
-
for (let
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
116
|
+
for (let start = 0; start < toUpsert.length; start += config.embeddings.batchSize) {
|
|
117
|
+
const batch = toUpsert.slice(start, start + config.embeddings.batchSize);
|
|
118
|
+
const embeddings = await (0, llm_1.generateEmbeddings)(batch.map((item) => item.document), config.embeddings);
|
|
119
|
+
for (let i = 0; i < batch.length; i += 1) {
|
|
120
|
+
const item = batch[i];
|
|
121
|
+
const metadata = item.rawCodeSnapshot
|
|
122
|
+
? {
|
|
123
|
+
filePath: item.filePath,
|
|
124
|
+
hash: item.hash,
|
|
125
|
+
summaryText: item.summaryText,
|
|
126
|
+
rawCodeSnapshot: item.rawCodeSnapshot
|
|
127
|
+
}
|
|
128
|
+
: {
|
|
129
|
+
filePath: item.filePath,
|
|
130
|
+
hash: item.hash,
|
|
131
|
+
summaryText: item.summaryText
|
|
132
|
+
};
|
|
133
|
+
await index.upsertItem({
|
|
134
|
+
id: item.filePath,
|
|
135
|
+
vector: embeddings[i],
|
|
136
|
+
metadata
|
|
137
|
+
});
|
|
138
|
+
}
|
|
138
139
|
}
|
|
139
140
|
for (const itemId of toDelete) {
|
|
140
141
|
await index.deleteItem(itemId);
|
package/dist/config.js
CHANGED
|
@@ -25,6 +25,17 @@ function readListOption(value, envName, fallback = []) {
|
|
|
25
25
|
}
|
|
26
26
|
return fallback;
|
|
27
27
|
}
|
|
28
|
+
function readPositiveIntegerOption(value, envName, fallback) {
|
|
29
|
+
const rawValue = value ?? process.env[envName];
|
|
30
|
+
if (rawValue === undefined || rawValue === "") {
|
|
31
|
+
return fallback;
|
|
32
|
+
}
|
|
33
|
+
const parsed = typeof rawValue === "number" ? rawValue : Number.parseInt(rawValue, 10);
|
|
34
|
+
if (!Number.isInteger(parsed) || parsed < 1) {
|
|
35
|
+
throw new Error(`Invalid ${envName}. Expected a positive integer.`);
|
|
36
|
+
}
|
|
37
|
+
return parsed;
|
|
38
|
+
}
|
|
28
39
|
function loadRcFile(configPath) {
|
|
29
40
|
const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
|
|
30
41
|
if (!node_fs_1.default.existsSync(resolvedPath)) {
|
|
@@ -57,6 +68,7 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
57
68
|
const mergedOptions = mergeConfigOptions(options);
|
|
58
69
|
const provider = readProvider(mergedOptions.aiProvider);
|
|
59
70
|
const apiKey = readOption(mergedOptions.openaiApiKey, "OPENAI_API_KEY", provider === "local" ? "local-key" : "");
|
|
71
|
+
const embedBatchSize = readPositiveIntegerOption(mergedOptions.embedBatchSize, "DIFFDOC_EMBED_BATCH_SIZE", 25);
|
|
60
72
|
const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
|
|
61
73
|
const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
|
|
62
74
|
const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
|
|
@@ -98,7 +110,8 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
98
110
|
embeddings: {
|
|
99
111
|
apiKey,
|
|
100
112
|
baseURL: embedBaseURL,
|
|
101
|
-
model: embedModel
|
|
113
|
+
model: embedModel,
|
|
114
|
+
batchSize: embedBatchSize
|
|
102
115
|
},
|
|
103
116
|
summarize: {
|
|
104
117
|
includeGlobs,
|
package/dist/index.js
CHANGED
|
@@ -31,7 +31,8 @@ function addEmbeddingOptions(command) {
|
|
|
31
31
|
return command
|
|
32
32
|
.option("--local-embed-endpoint <url>", "local OpenAI-compatible embeddings endpoint")
|
|
33
33
|
.option("--local-embed-model <model>", "local embedding model name")
|
|
34
|
-
.option("--cloud-embed-model <model>", "cloud embedding model name")
|
|
34
|
+
.option("--cloud-embed-model <model>", "cloud embedding model name")
|
|
35
|
+
.option("--embed-batch-size <count>", "number of summary documents to send per embeddings request");
|
|
35
36
|
}
|
|
36
37
|
function addCloudEndpointAndKeyOptions(command) {
|
|
37
38
|
return command
|
|
@@ -41,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
|
|
|
41
42
|
program
|
|
42
43
|
.name("diffdoc")
|
|
43
44
|
.description("Translate repository code shifts into plain-English business context")
|
|
44
|
-
.version("0.1
|
|
45
|
+
.version("0.4.1");
|
|
45
46
|
program
|
|
46
47
|
.command("init")
|
|
47
48
|
.description("Initialize DiffDoc configuration for this repository")
|