diffdoc 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@
9
9
  "cloudChatModel": "gpt-4o-mini",
10
10
  "cloudEmbedModel": "text-embedding-3-small",
11
11
  "embedBatchSize": 25,
12
+ "summarizeConcurrency": 2,
12
13
  "openaiApiKey": "",
13
14
  "includeGlobs": [],
14
15
  "excludeGlobs": [],
package/README.md CHANGED
@@ -125,6 +125,7 @@ Example `.diffdocrc` for local models:
125
125
  "localChatModel": "qwen2.5-coder:7b",
126
126
  "localEmbedModel": "nomic-embed-code",
127
127
  "embedBatchSize": 25,
128
+ "summarizeConcurrency": 2,
128
129
  "includeGlobs": [],
129
130
  "excludeGlobs": [],
130
131
  "ignoreFile": ".diffdocignore"
@@ -141,6 +142,7 @@ Example `.diffdocrc` for a cloud OpenAI-compatible endpoint:
141
142
  "cloudChatModel": "gpt-4o-mini",
142
143
  "cloudEmbedModel": "text-embedding-3-small",
143
144
  "embedBatchSize": 25,
145
+ "summarizeConcurrency": 2,
144
146
  "includeGlobs": [],
145
147
  "excludeGlobs": [],
146
148
  "ignoreFile": ".diffdocignore"
@@ -159,6 +161,7 @@ Supported environment variables:
159
161
  AI_PROVIDER
160
162
  DIFFDOC_BASE_DIR
161
163
  DIFFDOC_EMBED_BATCH_SIZE
164
+ DIFFDOC_SUMMARIZE_CONCURRENCY
162
165
  DIFFDOC_INCLUDE_GLOBS
163
166
  DIFFDOC_EXCLUDE_GLOBS
164
167
  DIFFDOC_IGNORE_FILE
@@ -227,8 +230,11 @@ Summarize files into `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json`:
227
230
  npx diffdoc summarize --path . --mode all
228
231
  npx diffdoc summarize --path . --mode delta
229
232
  npx diffdoc summarize --path . --mode delta --json
233
+ npx diffdoc summarize --path . --mode all --summarize-concurrency 4
230
234
  ```
231
235
 
236
+ Summarization runs with bounded concurrency. The default is `2`; use `1` for strict rate limits, `2-4` for most providers, and higher values only when your local model server or API quota can handle the request volume.
237
+
232
238
  Store raw code snapshots in summary assets when you want retrieved results to include source text:
233
239
 
234
240
  ```bash
@@ -19,6 +19,7 @@ const DEFAULT_CONFIG = {
19
19
  cloudChatModel: "gpt-4o-mini",
20
20
  cloudEmbedModel: "text-embedding-3-small",
21
21
  openaiApiKey: "",
22
+ summarizeConcurrency: 2,
22
23
  includeGlobs: [],
23
24
  excludeGlobs: [],
24
25
  ignoreFile: ".diffdocignore"
@@ -223,6 +223,25 @@ async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot
223
223
  };
224
224
  await writeSummaryAsset(summaryPath, summary);
225
225
  }
226
+ async function runWithConcurrency(items, concurrency, worker) {
227
+ let nextIndex = 0;
228
+ const workerCount = Math.min(concurrency, items.length);
229
+ await Promise.all(Array.from({ length: workerCount }, async () => {
230
+ while (nextIndex < items.length) {
231
+ const item = items[nextIndex];
232
+ nextIndex += 1;
233
+ await worker(item);
234
+ }
235
+ }));
236
+ }
237
+ function createManifestLock() {
238
+ let queue = Promise.resolve();
239
+ return async function withManifestLock(task) {
240
+ const run = queue.then(task, task);
241
+ queue = run.then(() => undefined, () => undefined);
242
+ return run;
243
+ };
244
+ }
226
245
  async function pruneOrphanedSummaries(summaryDir, manifest) {
227
246
  const activeHashes = new Set(Object.values(manifest.files));
228
247
  let entries = [];
@@ -269,6 +288,26 @@ async function runSummarize(options, config) {
269
288
  const totals = { scanned: 0, skipped: 0, updated: 0, failed: 0, pruned: 0 };
270
289
  const failures = [];
271
290
  const isJson = options.json;
291
+ const concurrency = config.summarize.concurrency;
292
+ const withManifestLock = createManifestLock();
293
+ const summaryAssetTasks = new Map();
294
+ async function ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot) {
295
+ const summaryPath = getSummaryPath(summaryDir, hash);
296
+ if (await fileExists(summaryPath)) {
297
+ return;
298
+ }
299
+ let task = summaryAssetTasks.get(hash);
300
+ if (!task) {
301
+ task = (async () => {
302
+ const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
303
+ await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
304
+ })().finally(() => {
305
+ summaryAssetTasks.delete(hash);
306
+ });
307
+ summaryAssetTasks.set(hash, task);
308
+ }
309
+ await task;
310
+ }
272
311
  if (!isJson) {
273
312
  console.log(`Starting summarize run`);
274
313
  console.log(`Mode: ${options.mode}`);
@@ -283,44 +322,51 @@ async function runSummarize(options, config) {
283
322
  await writeManifest(manifestPath, manifest);
284
323
  const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignoreMatcher);
285
324
  const totalFiles = files.length;
325
+ let completedFiles = 0;
286
326
  if (!isJson) {
287
327
  console.log(`Candidates: ${totalFiles}`);
328
+ console.log(`Concurrency: ${concurrency}`);
288
329
  }
289
- for (let i = 0; i < files.length; i += 1) {
290
- const filePath = files[i];
291
- totals.scanned += 1;
330
+ await runWithConcurrency(files, concurrency, async (filePath) => {
331
+ await withManifestLock(async () => {
332
+ totals.scanned += 1;
333
+ });
292
334
  try {
293
335
  const absolutePath = node_path_1.default.join(repoPath, filePath);
294
336
  const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
295
337
  const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
296
- const summaryPath = getSummaryPath(summaryDir, hash);
297
- if (!await fileExists(summaryPath)) {
298
- const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
299
- await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
300
- }
301
- manifest.files[filePath] = hash;
302
- refs.set(hash, (refs.get(hash) || 0) + 1);
303
- await writeManifest(manifestPath, manifest);
304
- totals.updated += 1;
305
- if (!isJson) {
306
- console.log(`[${i + 1}/${totalFiles}] summarized ${filePath}`);
307
- }
338
+ await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
339
+ await withManifestLock(async () => {
340
+ manifest.files[filePath] = hash;
341
+ refs.set(hash, (refs.get(hash) || 0) + 1);
342
+ await writeManifest(manifestPath, manifest);
343
+ totals.updated += 1;
344
+ completedFiles += 1;
345
+ if (!isJson) {
346
+ console.log(`[${completedFiles}/${totalFiles}] summarized ${filePath}`);
347
+ }
348
+ });
308
349
  }
309
350
  catch (error) {
310
351
  const message = error instanceof Error ? error.message : String(error);
311
- failures.push({ filePath, message });
312
- totals.failed += 1;
313
- if (!isJson) {
314
- console.error(`[${i + 1}/${totalFiles}] failed ${filePath}: ${message}`);
315
- }
352
+ await withManifestLock(async () => {
353
+ failures.push({ filePath, message });
354
+ totals.failed += 1;
355
+ completedFiles += 1;
356
+ if (!isJson) {
357
+ console.error(`[${completedFiles}/${totalFiles}] failed ${filePath}: ${message}`);
358
+ }
359
+ });
316
360
  }
317
- }
361
+ });
318
362
  }
319
363
  else {
320
364
  const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
321
365
  const totalCandidates = deltas.modifiedOrAdded.length + deltas.deleted.length;
366
+ let completedModified = 0;
322
367
  if (!isJson) {
323
368
  console.log(`Candidates: ${totalCandidates} (${deltas.modifiedOrAdded.length} modified/added, ${deltas.deleted.length} deleted)`);
369
+ console.log(`Concurrency: ${concurrency}`);
324
370
  }
325
371
  for (const deletedPath of deltas.deleted) {
326
372
  const removed = await removeManifestPath(deletedPath, manifest, manifestPath, summaryDir, refs);
@@ -331,73 +377,85 @@ async function runSummarize(options, config) {
331
377
  console.log(`pruned ${deletedPath}`);
332
378
  }
333
379
  }
334
- for (let i = 0; i < deltas.modifiedOrAdded.length; i += 1) {
335
- const filePath = deltas.modifiedOrAdded[i];
336
- totals.scanned += 1;
380
+ await runWithConcurrency(deltas.modifiedOrAdded, concurrency, async (filePath) => {
381
+ await withManifestLock(async () => {
382
+ totals.scanned += 1;
383
+ });
337
384
  try {
338
385
  if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignoreMatcher)) {
339
- const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
340
- if (removed) {
341
- totals.pruned += 1;
342
- }
343
- else {
344
- totals.skipped += 1;
345
- }
346
- if (!isJson) {
347
- console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] excluded ${filePath}`);
348
- }
349
- continue;
386
+ await withManifestLock(async () => {
387
+ const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
388
+ if (removed) {
389
+ totals.pruned += 1;
390
+ }
391
+ else {
392
+ totals.skipped += 1;
393
+ }
394
+ completedModified += 1;
395
+ if (!isJson) {
396
+ console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] excluded ${filePath}`);
397
+ }
398
+ });
399
+ return;
350
400
  }
351
401
  const previousHash = manifest.files[filePath];
352
402
  const absolutePath = node_path_1.default.join(repoPath, filePath);
353
403
  const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
354
404
  const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
355
405
  if (previousHash === hash) {
356
- totals.skipped += 1;
357
- if (!isJson) {
358
- console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] unchanged ${filePath}`);
359
- }
360
- continue;
361
- }
362
- const summaryPath = getSummaryPath(summaryDir, hash);
363
- if (!await fileExists(summaryPath)) {
364
- const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
365
- await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
366
- }
367
- const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
368
- if (changed) {
369
- totals.updated += 1;
370
- }
371
- else {
372
- totals.skipped += 1;
373
- }
374
- if (!isJson) {
375
- console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
406
+ await withManifestLock(async () => {
407
+ totals.skipped += 1;
408
+ completedModified += 1;
409
+ if (!isJson) {
410
+ console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] unchanged ${filePath}`);
411
+ }
412
+ });
413
+ return;
376
414
  }
377
- }
378
- catch (error) {
379
- const nodeError = error;
380
- if (nodeError.code === "ENOENT") {
381
- const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
382
- if (removed) {
383
- totals.pruned += 1;
415
+ await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
416
+ await withManifestLock(async () => {
417
+ const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
418
+ if (changed) {
419
+ totals.updated += 1;
384
420
  }
385
421
  else {
386
422
  totals.skipped += 1;
387
423
  }
424
+ completedModified += 1;
388
425
  if (!isJson) {
389
- console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] missing ${filePath}`);
426
+ console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
390
427
  }
391
- continue;
428
+ });
429
+ }
430
+ catch (error) {
431
+ const nodeError = error;
432
+ if (nodeError.code === "ENOENT") {
433
+ await withManifestLock(async () => {
434
+ const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
435
+ if (removed) {
436
+ totals.pruned += 1;
437
+ }
438
+ else {
439
+ totals.skipped += 1;
440
+ }
441
+ completedModified += 1;
442
+ if (!isJson) {
443
+ console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] missing ${filePath}`);
444
+ }
445
+ });
446
+ return;
392
447
  }
393
448
  const message = error instanceof Error ? error.message : String(error);
394
- failures.push({ filePath, message });
395
- totals.failed += 1;
396
- if (!isJson) {
397
- console.error(`[${i + 1}/${deltas.modifiedOrAdded.length}] failed ${filePath}: ${message}`);
398
- }
449
+ await withManifestLock(async () => {
450
+ failures.push({ filePath, message });
451
+ totals.failed += 1;
452
+ completedModified += 1;
453
+ if (!isJson) {
454
+ console.error(`[${completedModified}/${deltas.modifiedOrAdded.length}] failed ${filePath}: ${message}`);
455
+ }
456
+ });
399
457
  }
400
- }
458
+ });
401
459
  }
402
460
  manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
403
461
  await writeManifest(manifestPath, manifest);
@@ -413,7 +471,7 @@ async function runSummarize(options, config) {
413
471
  finishedAt: finishedAt.toISOString(),
414
472
  durationMs,
415
473
  totals,
416
- failures
474
+ failures: failures.sort((a, b) => a.filePath.localeCompare(b.filePath))
417
475
  };
418
476
  if (isJson) {
419
477
  console.log(JSON.stringify(report, null, 2));
package/dist/config.js CHANGED
@@ -72,6 +72,7 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
72
72
  const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
73
73
  const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
74
74
  const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
75
+ const summarizeConcurrency = readPositiveIntegerOption(mergedOptions.summarizeConcurrency, "DIFFDOC_SUMMARIZE_CONCURRENCY", 2);
75
76
  const chatBaseURL = provider === "cloud"
76
77
  ? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
77
78
  : readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
@@ -116,7 +117,8 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
116
117
  summarize: {
117
118
  includeGlobs,
118
119
  excludeGlobs,
119
- ignoreFile
120
+ ignoreFile,
121
+ concurrency: summarizeConcurrency
120
122
  }
121
123
  };
122
124
  }
package/dist/index.js CHANGED
@@ -42,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
42
42
  program
43
43
  .name("diffdoc")
44
44
  .description("Translate repository code shifts into plain-English business context")
45
- .version("0.4.3");
45
+ .version("0.5.0");
46
46
  program
47
47
  .command("init")
48
48
  .description("Initialize DiffDoc configuration for this repository")
@@ -71,6 +71,7 @@ addChatOptions(addBaseOptions(program
71
71
  .option("--include-glob <pattern>", "include glob pattern (repeatable)", collectOption, [])
72
72
  .option("--exclude-glob <pattern>", "exclude glob pattern (repeatable)", collectOption, [])
73
73
  .option("--ignore-file <path>", "path to ignore pattern file relative to --path")
74
+ .option("--summarize-concurrency <count>", "number of files to summarize concurrently")
74
75
  .action(async (options) => {
75
76
  try {
76
77
  const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "diffdoc",
3
- "version": "0.4.3",
3
+ "version": "0.5.0",
4
4
  "description": "Translate repository code shifts into plain-English business context",
5
5
  "license": "MIT",
6
6
  "author": "Christopher Sullivan",