diffdoc 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@
8
8
  "cloudLlmEndpoint": "https://api.openai.com/v1",
9
9
  "cloudChatModel": "gpt-4o-mini",
10
10
  "cloudEmbedModel": "text-embedding-3-small",
11
+ "embedBatchSize": 25,
11
12
  "openaiApiKey": "",
12
13
  "includeGlobs": [],
13
14
  "excludeGlobs": [],
package/README.md CHANGED
@@ -190,6 +190,12 @@ Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
190
190
  diffdoc embed
191
191
  ```
192
192
 
193
+ Limit how many summary documents are sent per embeddings request:
194
+
195
+ ```bash
196
+ diffdoc embed --embed-batch-size 20
197
+ ```
198
+
193
199
  Force full index rebuild:
194
200
 
195
201
  ```bash
@@ -303,7 +309,7 @@ Run `diffdoc summarize` and `diffdoc embed` before using the MCP server, otherwi
303
309
  - `summarize --json` prints a single machine-readable run report to stdout for CI parsing.
304
310
  - `status` does not require a configured chat or embedding model.
305
311
  - `status --json` prints a machine-readable report with summary and index freshness details.
306
- - `embed` requires a configured embedding model.
312
+ - `embed` requires a configured embedding model. Use `embedBatchSize` in `.diffdocrc`, `DIFFDOC_EMBED_BATCH_SIZE`, or `--embed-batch-size` to tune how many summary documents are sent per embeddings request.
307
313
  - `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
308
314
  - `query` requires both a configured chat model and embedding model.
309
315
  - For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
@@ -111,30 +111,31 @@ async function runEmbed(options, config) {
111
111
  console.log(`Index is already up to date at ${indexPath}.`);
112
112
  return;
113
113
  }
114
- const embeddings = toUpsert.length > 0
115
- ? await (0, llm_1.generateEmbeddings)(toUpsert.map((item) => item.document), config.embeddings)
116
- : [];
117
114
  await index.beginUpdate();
118
115
  try {
119
- for (let i = 0; i < toUpsert.length; i += 1) {
120
- const item = toUpsert[i];
121
- const metadata = item.rawCodeSnapshot
122
- ? {
123
- filePath: item.filePath,
124
- hash: item.hash,
125
- summaryText: item.summaryText,
126
- rawCodeSnapshot: item.rawCodeSnapshot
127
- }
128
- : {
129
- filePath: item.filePath,
130
- hash: item.hash,
131
- summaryText: item.summaryText
132
- };
133
- await index.upsertItem({
134
- id: item.filePath,
135
- vector: embeddings[i],
136
- metadata
137
- });
116
+ for (let start = 0; start < toUpsert.length; start += config.embeddings.batchSize) {
117
+ const batch = toUpsert.slice(start, start + config.embeddings.batchSize);
118
+ const embeddings = await (0, llm_1.generateEmbeddings)(batch.map((item) => item.document), config.embeddings);
119
+ for (let i = 0; i < batch.length; i += 1) {
120
+ const item = batch[i];
121
+ const metadata = item.rawCodeSnapshot
122
+ ? {
123
+ filePath: item.filePath,
124
+ hash: item.hash,
125
+ summaryText: item.summaryText,
126
+ rawCodeSnapshot: item.rawCodeSnapshot
127
+ }
128
+ : {
129
+ filePath: item.filePath,
130
+ hash: item.hash,
131
+ summaryText: item.summaryText
132
+ };
133
+ await index.upsertItem({
134
+ id: item.filePath,
135
+ vector: embeddings[i],
136
+ metadata
137
+ });
138
+ }
138
139
  }
139
140
  for (const itemId of toDelete) {
140
141
  await index.deleteItem(itemId);
package/dist/config.js CHANGED
@@ -25,6 +25,17 @@ function readListOption(value, envName, fallback = []) {
25
25
  }
26
26
  return fallback;
27
27
  }
28
+ function readPositiveIntegerOption(value, envName, fallback) {
29
+ const rawValue = value ?? process.env[envName];
30
+ if (rawValue === undefined || rawValue === "") {
31
+ return fallback;
32
+ }
33
+ const parsed = typeof rawValue === "number" ? rawValue : Number.parseInt(rawValue, 10);
34
+ if (!Number.isInteger(parsed) || parsed < 1) {
35
+ throw new Error(`Invalid ${envName}. Expected a positive integer.`);
36
+ }
37
+ return parsed;
38
+ }
28
39
  function loadRcFile(configPath) {
29
40
  const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
30
41
  if (!node_fs_1.default.existsSync(resolvedPath)) {
@@ -57,6 +68,7 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
57
68
  const mergedOptions = mergeConfigOptions(options);
58
69
  const provider = readProvider(mergedOptions.aiProvider);
59
70
  const apiKey = readOption(mergedOptions.openaiApiKey, "OPENAI_API_KEY", provider === "local" ? "local-key" : "");
71
+ const embedBatchSize = readPositiveIntegerOption(mergedOptions.embedBatchSize, "DIFFDOC_EMBED_BATCH_SIZE", 25);
60
72
  const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
61
73
  const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
62
74
  const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
@@ -98,7 +110,8 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
98
110
  embeddings: {
99
111
  apiKey,
100
112
  baseURL: embedBaseURL,
101
- model: embedModel
113
+ model: embedModel,
114
+ batchSize: embedBatchSize
102
115
  },
103
116
  summarize: {
104
117
  includeGlobs,
package/dist/index.js CHANGED
@@ -31,7 +31,8 @@ function addEmbeddingOptions(command) {
31
31
  return command
32
32
  .option("--local-embed-endpoint <url>", "local OpenAI-compatible embeddings endpoint")
33
33
  .option("--local-embed-model <model>", "local embedding model name")
34
- .option("--cloud-embed-model <model>", "cloud embedding model name");
34
+ .option("--cloud-embed-model <model>", "cloud embedding model name")
35
+ .option("--embed-batch-size <count>", "number of summary documents to send per embeddings request");
35
36
  }
36
37
  function addCloudEndpointAndKeyOptions(command) {
37
38
  return command
@@ -41,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
41
42
  program
42
43
  .name("diffdoc")
43
44
  .description("Translate repository code shifts into plain-English business context")
44
- .version("0.1.0");
45
+ .version("0.4.1");
45
46
  program
46
47
  .command("init")
47
48
  .description("Initialize DiffDoc configuration for this repository")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "diffdoc",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "Translate repository code shifts into plain-English business context",
5
5
  "license": "MIT",
6
6
  "author": "Christopher Sullivan",