diffdoc 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.diffdocrc.example +1 -0
- package/README.md +6 -0
- package/dist/commands/init.js +1 -0
- package/dist/commands/summarize.js +129 -71
- package/dist/config.js +3 -1
- package/dist/index.js +2 -1
- package/package.json +1 -1
package/.diffdocrc.example
CHANGED
package/README.md
CHANGED
|
@@ -125,6 +125,7 @@ Example `.diffdocrc` for local models:
|
|
|
125
125
|
"localChatModel": "qwen2.5-coder:7b",
|
|
126
126
|
"localEmbedModel": "nomic-embed-code",
|
|
127
127
|
"embedBatchSize": 25,
|
|
128
|
+
"summarizeConcurrency": 2,
|
|
128
129
|
"includeGlobs": [],
|
|
129
130
|
"excludeGlobs": [],
|
|
130
131
|
"ignoreFile": ".diffdocignore"
|
|
@@ -141,6 +142,7 @@ Example `.diffdocrc` for a cloud OpenAI-compatible endpoint:
|
|
|
141
142
|
"cloudChatModel": "gpt-4o-mini",
|
|
142
143
|
"cloudEmbedModel": "text-embedding-3-small",
|
|
143
144
|
"embedBatchSize": 25,
|
|
145
|
+
"summarizeConcurrency": 2,
|
|
144
146
|
"includeGlobs": [],
|
|
145
147
|
"excludeGlobs": [],
|
|
146
148
|
"ignoreFile": ".diffdocignore"
|
|
@@ -159,6 +161,7 @@ Supported environment variables:
|
|
|
159
161
|
AI_PROVIDER
|
|
160
162
|
DIFFDOC_BASE_DIR
|
|
161
163
|
DIFFDOC_EMBED_BATCH_SIZE
|
|
164
|
+
DIFFDOC_SUMMARIZE_CONCURRENCY
|
|
162
165
|
DIFFDOC_INCLUDE_GLOBS
|
|
163
166
|
DIFFDOC_EXCLUDE_GLOBS
|
|
164
167
|
DIFFDOC_IGNORE_FILE
|
|
@@ -227,8 +230,11 @@ Summarize files into `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json`:
|
|
|
227
230
|
npx diffdoc summarize --path . --mode all
|
|
228
231
|
npx diffdoc summarize --path . --mode delta
|
|
229
232
|
npx diffdoc summarize --path . --mode delta --json
|
|
233
|
+
npx diffdoc summarize --path . --mode all --summarize-concurrency 4
|
|
230
234
|
```
|
|
231
235
|
|
|
236
|
+
Summarization runs with bounded concurrency. The default is `2`; use `1` for strict rate limits, `2-4` for most providers, and higher values only when your local model server or API quota can handle the request volume.
|
|
237
|
+
|
|
232
238
|
Store raw code snapshots in summary assets when you want retrieved results to include source text:
|
|
233
239
|
|
|
234
240
|
```bash
|
package/dist/commands/init.js
CHANGED
|
@@ -223,6 +223,25 @@ async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot
|
|
|
223
223
|
};
|
|
224
224
|
await writeSummaryAsset(summaryPath, summary);
|
|
225
225
|
}
|
|
226
|
+
async function runWithConcurrency(items, concurrency, worker) {
|
|
227
|
+
let nextIndex = 0;
|
|
228
|
+
const workerCount = Math.min(concurrency, items.length);
|
|
229
|
+
await Promise.all(Array.from({ length: workerCount }, async () => {
|
|
230
|
+
while (nextIndex < items.length) {
|
|
231
|
+
const item = items[nextIndex];
|
|
232
|
+
nextIndex += 1;
|
|
233
|
+
await worker(item);
|
|
234
|
+
}
|
|
235
|
+
}));
|
|
236
|
+
}
|
|
237
|
+
function createManifestLock() {
|
|
238
|
+
let queue = Promise.resolve();
|
|
239
|
+
return async function withManifestLock(task) {
|
|
240
|
+
const run = queue.then(task, task);
|
|
241
|
+
queue = run.then(() => undefined, () => undefined);
|
|
242
|
+
return run;
|
|
243
|
+
};
|
|
244
|
+
}
|
|
226
245
|
async function pruneOrphanedSummaries(summaryDir, manifest) {
|
|
227
246
|
const activeHashes = new Set(Object.values(manifest.files));
|
|
228
247
|
let entries = [];
|
|
@@ -269,6 +288,26 @@ async function runSummarize(options, config) {
|
|
|
269
288
|
const totals = { scanned: 0, skipped: 0, updated: 0, failed: 0, pruned: 0 };
|
|
270
289
|
const failures = [];
|
|
271
290
|
const isJson = options.json;
|
|
291
|
+
const concurrency = config.summarize.concurrency;
|
|
292
|
+
const withManifestLock = createManifestLock();
|
|
293
|
+
const summaryAssetTasks = new Map();
|
|
294
|
+
async function ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot) {
|
|
295
|
+
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
296
|
+
if (await fileExists(summaryPath)) {
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
let task = summaryAssetTasks.get(hash);
|
|
300
|
+
if (!task) {
|
|
301
|
+
task = (async () => {
|
|
302
|
+
const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
|
|
303
|
+
await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
|
|
304
|
+
})().finally(() => {
|
|
305
|
+
summaryAssetTasks.delete(hash);
|
|
306
|
+
});
|
|
307
|
+
summaryAssetTasks.set(hash, task);
|
|
308
|
+
}
|
|
309
|
+
await task;
|
|
310
|
+
}
|
|
272
311
|
if (!isJson) {
|
|
273
312
|
console.log(`Starting summarize run`);
|
|
274
313
|
console.log(`Mode: ${options.mode}`);
|
|
@@ -283,44 +322,51 @@ async function runSummarize(options, config) {
|
|
|
283
322
|
await writeManifest(manifestPath, manifest);
|
|
284
323
|
const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignoreMatcher);
|
|
285
324
|
const totalFiles = files.length;
|
|
325
|
+
let completedFiles = 0;
|
|
286
326
|
if (!isJson) {
|
|
287
327
|
console.log(`Candidates: ${totalFiles}`);
|
|
328
|
+
console.log(`Concurrency: ${concurrency}`);
|
|
288
329
|
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
330
|
+
await runWithConcurrency(files, concurrency, async (filePath) => {
|
|
331
|
+
await withManifestLock(async () => {
|
|
332
|
+
totals.scanned += 1;
|
|
333
|
+
});
|
|
292
334
|
try {
|
|
293
335
|
const absolutePath = node_path_1.default.join(repoPath, filePath);
|
|
294
336
|
const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
|
|
295
337
|
const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
}
|
|
338
|
+
await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
339
|
+
await withManifestLock(async () => {
|
|
340
|
+
manifest.files[filePath] = hash;
|
|
341
|
+
refs.set(hash, (refs.get(hash) || 0) + 1);
|
|
342
|
+
await writeManifest(manifestPath, manifest);
|
|
343
|
+
totals.updated += 1;
|
|
344
|
+
completedFiles += 1;
|
|
345
|
+
if (!isJson) {
|
|
346
|
+
console.log(`[${completedFiles}/${totalFiles}] summarized ${filePath}`);
|
|
347
|
+
}
|
|
348
|
+
});
|
|
308
349
|
}
|
|
309
350
|
catch (error) {
|
|
310
351
|
const message = error instanceof Error ? error.message : String(error);
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
352
|
+
await withManifestLock(async () => {
|
|
353
|
+
failures.push({ filePath, message });
|
|
354
|
+
totals.failed += 1;
|
|
355
|
+
completedFiles += 1;
|
|
356
|
+
if (!isJson) {
|
|
357
|
+
console.error(`[${completedFiles}/${totalFiles}] failed ${filePath}: ${message}`);
|
|
358
|
+
}
|
|
359
|
+
});
|
|
316
360
|
}
|
|
317
|
-
}
|
|
361
|
+
});
|
|
318
362
|
}
|
|
319
363
|
else {
|
|
320
364
|
const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
|
|
321
365
|
const totalCandidates = deltas.modifiedOrAdded.length + deltas.deleted.length;
|
|
366
|
+
let completedModified = 0;
|
|
322
367
|
if (!isJson) {
|
|
323
368
|
console.log(`Candidates: ${totalCandidates} (${deltas.modifiedOrAdded.length} modified/added, ${deltas.deleted.length} deleted)`);
|
|
369
|
+
console.log(`Concurrency: ${concurrency}`);
|
|
324
370
|
}
|
|
325
371
|
for (const deletedPath of deltas.deleted) {
|
|
326
372
|
const removed = await removeManifestPath(deletedPath, manifest, manifestPath, summaryDir, refs);
|
|
@@ -331,73 +377,85 @@ async function runSummarize(options, config) {
|
|
|
331
377
|
console.log(`pruned ${deletedPath}`);
|
|
332
378
|
}
|
|
333
379
|
}
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
380
|
+
await runWithConcurrency(deltas.modifiedOrAdded, concurrency, async (filePath) => {
|
|
381
|
+
await withManifestLock(async () => {
|
|
382
|
+
totals.scanned += 1;
|
|
383
|
+
});
|
|
337
384
|
try {
|
|
338
385
|
if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignoreMatcher)) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
386
|
+
await withManifestLock(async () => {
|
|
387
|
+
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
388
|
+
if (removed) {
|
|
389
|
+
totals.pruned += 1;
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
totals.skipped += 1;
|
|
393
|
+
}
|
|
394
|
+
completedModified += 1;
|
|
395
|
+
if (!isJson) {
|
|
396
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] excluded ${filePath}`);
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
return;
|
|
350
400
|
}
|
|
351
401
|
const previousHash = manifest.files[filePath];
|
|
352
402
|
const absolutePath = node_path_1.default.join(repoPath, filePath);
|
|
353
403
|
const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
|
|
354
404
|
const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
|
|
355
405
|
if (previousHash === hash) {
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
|
|
365
|
-
await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
|
|
366
|
-
}
|
|
367
|
-
const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
|
|
368
|
-
if (changed) {
|
|
369
|
-
totals.updated += 1;
|
|
370
|
-
}
|
|
371
|
-
else {
|
|
372
|
-
totals.skipped += 1;
|
|
373
|
-
}
|
|
374
|
-
if (!isJson) {
|
|
375
|
-
console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
|
|
406
|
+
await withManifestLock(async () => {
|
|
407
|
+
totals.skipped += 1;
|
|
408
|
+
completedModified += 1;
|
|
409
|
+
if (!isJson) {
|
|
410
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] unchanged ${filePath}`);
|
|
411
|
+
}
|
|
412
|
+
});
|
|
413
|
+
return;
|
|
376
414
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
if (removed) {
|
|
383
|
-
totals.pruned += 1;
|
|
415
|
+
await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
416
|
+
await withManifestLock(async () => {
|
|
417
|
+
const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
|
|
418
|
+
if (changed) {
|
|
419
|
+
totals.updated += 1;
|
|
384
420
|
}
|
|
385
421
|
else {
|
|
386
422
|
totals.skipped += 1;
|
|
387
423
|
}
|
|
424
|
+
completedModified += 1;
|
|
388
425
|
if (!isJson) {
|
|
389
|
-
console.log(`[${
|
|
426
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
|
|
390
427
|
}
|
|
391
|
-
|
|
428
|
+
});
|
|
429
|
+
}
|
|
430
|
+
catch (error) {
|
|
431
|
+
const nodeError = error;
|
|
432
|
+
if (nodeError.code === "ENOENT") {
|
|
433
|
+
await withManifestLock(async () => {
|
|
434
|
+
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
435
|
+
if (removed) {
|
|
436
|
+
totals.pruned += 1;
|
|
437
|
+
}
|
|
438
|
+
else {
|
|
439
|
+
totals.skipped += 1;
|
|
440
|
+
}
|
|
441
|
+
completedModified += 1;
|
|
442
|
+
if (!isJson) {
|
|
443
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] missing ${filePath}`);
|
|
444
|
+
}
|
|
445
|
+
});
|
|
446
|
+
return;
|
|
392
447
|
}
|
|
393
448
|
const message = error instanceof Error ? error.message : String(error);
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
449
|
+
await withManifestLock(async () => {
|
|
450
|
+
failures.push({ filePath, message });
|
|
451
|
+
totals.failed += 1;
|
|
452
|
+
completedModified += 1;
|
|
453
|
+
if (!isJson) {
|
|
454
|
+
console.error(`[${completedModified}/${deltas.modifiedOrAdded.length}] failed ${filePath}: ${message}`);
|
|
455
|
+
}
|
|
456
|
+
});
|
|
399
457
|
}
|
|
400
|
-
}
|
|
458
|
+
});
|
|
401
459
|
}
|
|
402
460
|
manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
|
|
403
461
|
await writeManifest(manifestPath, manifest);
|
|
@@ -413,7 +471,7 @@ async function runSummarize(options, config) {
|
|
|
413
471
|
finishedAt: finishedAt.toISOString(),
|
|
414
472
|
durationMs,
|
|
415
473
|
totals,
|
|
416
|
-
failures
|
|
474
|
+
failures: failures.sort((a, b) => a.filePath.localeCompare(b.filePath))
|
|
417
475
|
};
|
|
418
476
|
if (isJson) {
|
|
419
477
|
console.log(JSON.stringify(report, null, 2));
|
package/dist/config.js
CHANGED
|
@@ -72,6 +72,7 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
72
72
|
const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
|
|
73
73
|
const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
|
|
74
74
|
const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
|
|
75
|
+
const summarizeConcurrency = readPositiveIntegerOption(mergedOptions.summarizeConcurrency, "DIFFDOC_SUMMARIZE_CONCURRENCY", 2);
|
|
75
76
|
const chatBaseURL = provider === "cloud"
|
|
76
77
|
? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
|
|
77
78
|
: readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
|
|
@@ -116,7 +117,8 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
116
117
|
summarize: {
|
|
117
118
|
includeGlobs,
|
|
118
119
|
excludeGlobs,
|
|
119
|
-
ignoreFile
|
|
120
|
+
ignoreFile,
|
|
121
|
+
concurrency: summarizeConcurrency
|
|
120
122
|
}
|
|
121
123
|
};
|
|
122
124
|
}
|
package/dist/index.js
CHANGED
|
@@ -42,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
|
|
|
42
42
|
program
|
|
43
43
|
.name("diffdoc")
|
|
44
44
|
.description("Translate repository code shifts into plain-English business context")
|
|
45
|
-
.version("0.
|
|
45
|
+
.version("0.5.0");
|
|
46
46
|
program
|
|
47
47
|
.command("init")
|
|
48
48
|
.description("Initialize DiffDoc configuration for this repository")
|
|
@@ -71,6 +71,7 @@ addChatOptions(addBaseOptions(program
|
|
|
71
71
|
.option("--include-glob <pattern>", "include glob pattern (repeatable)", collectOption, [])
|
|
72
72
|
.option("--exclude-glob <pattern>", "exclude glob pattern (repeatable)", collectOption, [])
|
|
73
73
|
.option("--ignore-file <path>", "path to ignore pattern file relative to --path")
|
|
74
|
+
.option("--summarize-concurrency <count>", "number of files to summarize concurrently")
|
|
74
75
|
.action(async (options) => {
|
|
75
76
|
try {
|
|
76
77
|
const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
|