diffdoc 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -3
- package/dist/commands/embed.js +7 -6
- package/dist/commands/status.js +76 -8
- package/dist/commands/summarize.js +105 -21
- package/dist/config.js +20 -1
- package/dist/index.js +6 -2
- package/dist/types/artifacts.js +1 -1
- package/dist/utils/hashing.js +4 -0
- package/dist/utils/llm.js +94 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -165,6 +165,8 @@ DIFFDOC_SUMMARIZE_CONCURRENCY
|
|
|
165
165
|
DIFFDOC_INCLUDE_GLOBS
|
|
166
166
|
DIFFDOC_EXCLUDE_GLOBS
|
|
167
167
|
DIFFDOC_IGNORE_FILE
|
|
168
|
+
DIFFDOC_SUMMARY_PROMPT
|
|
169
|
+
DIFFDOC_SUMMARY_PROMPT_FILE
|
|
168
170
|
LOCAL_LLM_ENDPOINT
|
|
169
171
|
LOCAL_CHAT_MODEL
|
|
170
172
|
LOCAL_EMBED_ENDPOINT
|
|
@@ -231,16 +233,26 @@ npx diffdoc summarize --path . --mode all
|
|
|
231
233
|
npx diffdoc summarize --path . --mode delta
|
|
232
234
|
npx diffdoc summarize --path . --mode delta --json
|
|
233
235
|
npx diffdoc summarize --path . --mode all --summarize-concurrency 4
|
|
236
|
+
npx diffdoc summarize --path . --mode all --refresh
|
|
234
237
|
```
|
|
235
238
|
|
|
236
239
|
Summarization runs with bounded concurrency. The default is `2`; use `1` for strict rate limits, `2-4` for most providers, and higher values only when your local model server or API quota can handle the request volume.
|
|
237
240
|
|
|
238
|
-
|
|
241
|
+
Use `--summary-prompt` or `--summary-prompt-file` to add domain-specific guidance without replacing DiffDoc's default structured prompt:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
npx diffdoc summarize --summary-prompt "Emphasize billing behavior, permissions, data retention, and operational risk."
|
|
245
|
+
npx diffdoc summarize --summary-prompt-file ./diffdoc-summary-prompt.md
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
Raw code snapshots are optional. DiffDoc normally stores file path and content hash metadata so tools can look up source files from the repository when needed. Store raw code snapshots only when you need exported, offline, or point-in-time audit artifacts to include source text:
|
|
239
249
|
|
|
240
250
|
```bash
|
|
241
251
|
npx diffdoc summarize --path . --mode all --include-code-snapshot
|
|
242
252
|
```
|
|
243
253
|
|
|
254
|
+
Snapshots increase artifact size and duplicate source code, which can include sensitive or proprietary content.
|
|
255
|
+
|
|
244
256
|
Check manifest and index freshness:
|
|
245
257
|
|
|
246
258
|
```bash
|
|
@@ -248,6 +260,8 @@ npx diffdoc status
|
|
|
248
260
|
npx diffdoc status --json
|
|
249
261
|
```
|
|
250
262
|
|
|
263
|
+
`status` also recommends the next command to run. It prioritizes refreshing missing or stale summaries before rebuilding the vector index.
|
|
264
|
+
|
|
251
265
|
Embed summaries into the local Vectra index:
|
|
252
266
|
|
|
253
267
|
```bash
|
|
@@ -305,13 +319,47 @@ Each summary asset is portable JSON:
|
|
|
305
319
|
|
|
306
320
|
```json
|
|
307
321
|
{
|
|
308
|
-
"schemaVersion":
|
|
322
|
+
"schemaVersion": 2,
|
|
309
323
|
"content_hash": "md5-string",
|
|
310
|
-
"
|
|
324
|
+
"metadata": {
|
|
325
|
+
"file_path": "src/example.ts",
|
|
326
|
+
"file_name": "example.ts",
|
|
327
|
+
"extension": ".ts",
|
|
328
|
+
"line_count": 42,
|
|
329
|
+
"byte_size": 1200,
|
|
330
|
+
"content_hash": "md5-string",
|
|
331
|
+
"generated_at": "2026-05-27T00:00:00.000Z",
|
|
332
|
+
"generator": {
|
|
333
|
+
"provider": "local",
|
|
334
|
+
"model": "qwen2.5-coder:7b",
|
|
335
|
+
"base_url": "http://localhost:11434/v1"
|
|
336
|
+
},
|
|
337
|
+
"prompt_version": 1,
|
|
338
|
+
"summary_format": "structured-functional-v1"
|
|
339
|
+
},
|
|
340
|
+
"summary": "## Metadata\n- File path: src/example.ts\n...",
|
|
311
341
|
"raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
|
|
312
342
|
}
|
|
313
343
|
```
|
|
314
344
|
|
|
345
|
+
The JSON `metadata` contains deterministic source and generation facts. The markdown `summary` begins with `## Metadata`, which is embedded with the rest of the summary so file paths, hashes, inferred language/type, symbols, functions, classes, and dependencies are searchable. Language/type and symbol/dependency details are inferred by the model from the file path, extension, and code content rather than maintained through a static parser.
|
|
346
|
+
|
|
347
|
+
Structured summaries use these sections in order:
|
|
348
|
+
|
|
349
|
+
```md
|
|
350
|
+
## Metadata
|
|
351
|
+
## Purpose
|
|
352
|
+
## User-Visible Behavior
|
|
353
|
+
## Business Rules
|
|
354
|
+
## Data Inputs And Outputs
|
|
355
|
+
## Side Effects
|
|
356
|
+
## Error And Edge Cases
|
|
357
|
+
## Dependencies
|
|
358
|
+
## Operational Notes
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
Summary assets are regenerated when the source hash changes, summary schema changes, prompt version changes, summary format changes, custom prompt hash changes, provider/model changes, or `--refresh` is passed. Regenerate existing schema `1` artifacts with `npx diffdoc summarize --mode all --refresh`. The `embed` command remains tolerant of older summary assets as long as they contain a content hash and summary text; use `status` or `summarize` to identify and refresh stale metadata.
|
|
362
|
+
|
|
315
363
|
Commit `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json` if you want summaries shared across machines or CI runs. Keep `.diffdoc/vectra/` local unless you have a specific reason to commit the generated vector index.
|
|
316
364
|
|
|
317
365
|
The manifest and summary assets are the stable handoff point for consumers. The local Vectra index produced by `diffdoc embed` is optional and can be replaced by any embedding model and storage backend that fits your environment.
|
package/dist/commands/embed.js
CHANGED
|
@@ -34,8 +34,8 @@ async function readManifest(manifestPath) {
|
|
|
34
34
|
}
|
|
35
35
|
async function readSummaryAsset(summaryPath) {
|
|
36
36
|
const parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
|
|
37
|
-
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
|
|
38
|
-
throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion
|
|
37
|
+
if (typeof parsed.schemaVersion !== "number" || parsed.schemaVersion < 1 || parsed.schemaVersion > artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
|
|
38
|
+
throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion 1-${artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION}.`);
|
|
39
39
|
}
|
|
40
40
|
if (typeof parsed.content_hash !== "string") {
|
|
41
41
|
throw new Error(`Invalid summary hash in ${summaryPath}.`);
|
|
@@ -44,14 +44,15 @@ async function readSummaryAsset(summaryPath) {
|
|
|
44
44
|
throw new Error(`Invalid summary text in ${summaryPath}.`);
|
|
45
45
|
}
|
|
46
46
|
return {
|
|
47
|
-
schemaVersion:
|
|
47
|
+
schemaVersion: parsed.schemaVersion,
|
|
48
48
|
content_hash: parsed.content_hash,
|
|
49
|
+
metadata: parsed.metadata && typeof parsed.metadata === "object" ? parsed.metadata : undefined,
|
|
49
50
|
summary: parsed.summary,
|
|
50
51
|
raw_code_snapshot: typeof parsed.raw_code_snapshot === "string" ? parsed.raw_code_snapshot : undefined
|
|
51
52
|
};
|
|
52
53
|
}
|
|
53
|
-
function buildDocument(
|
|
54
|
-
return
|
|
54
|
+
function buildDocument(summaryAsset) {
|
|
55
|
+
return summaryAsset.summary;
|
|
55
56
|
}
|
|
56
57
|
async function runEmbed(options, config) {
|
|
57
58
|
const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
|
|
@@ -96,7 +97,7 @@ async function runEmbed(options, config) {
|
|
|
96
97
|
hash,
|
|
97
98
|
summaryText: summaryAsset.summary,
|
|
98
99
|
rawCodeSnapshot: summaryAsset.raw_code_snapshot,
|
|
99
|
-
document: buildDocument(
|
|
100
|
+
document: buildDocument(summaryAsset)
|
|
100
101
|
});
|
|
101
102
|
}
|
|
102
103
|
const activePathSet = new Set(entries.map(([filePath]) => filePath));
|
package/dist/commands/status.js
CHANGED
|
@@ -10,6 +10,7 @@ const vectra_1 = require("vectra");
|
|
|
10
10
|
const embed_1 = require("./embed");
|
|
11
11
|
const artifacts_1 = require("../types/artifacts");
|
|
12
12
|
const paths_1 = require("../utils/paths");
|
|
13
|
+
const llm_1 = require("../utils/llm");
|
|
13
14
|
function getSummaryDir(manifestPath) {
|
|
14
15
|
return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
|
|
15
16
|
}
|
|
@@ -64,10 +65,37 @@ async function getSummaryStats(manifestPath, manifest) {
|
|
|
64
65
|
missingFromManifestCount += 1;
|
|
65
66
|
}
|
|
66
67
|
}
|
|
68
|
+
let staleCount = 0;
|
|
69
|
+
for (const hash of manifestHashes) {
|
|
70
|
+
if (!summaryHashes.has(hash)) {
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
const parsed = JSON.parse(await promises_1.default.readFile(node_path_1.default.resolve(summaryDir, `${hash}.json`), "utf8"));
|
|
75
|
+
const metadata = parsed.metadata && typeof parsed.metadata === "object" && !Array.isArray(parsed.metadata)
|
|
76
|
+
? parsed.metadata
|
|
77
|
+
: undefined;
|
|
78
|
+
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION ||
|
|
79
|
+
parsed.content_hash !== hash ||
|
|
80
|
+
!metadata ||
|
|
81
|
+
typeof metadata.file_path !== "string" ||
|
|
82
|
+
typeof metadata.file_name !== "string" ||
|
|
83
|
+
typeof metadata.extension !== "string" ||
|
|
84
|
+
metadata.content_hash !== hash ||
|
|
85
|
+
metadata.prompt_version !== llm_1.SUMMARY_PROMPT_VERSION ||
|
|
86
|
+
metadata.summary_format !== llm_1.SUMMARY_FORMAT) {
|
|
87
|
+
staleCount += 1;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
staleCount += 1;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
67
94
|
return {
|
|
68
95
|
summaryFileCount: summaryHashes.size,
|
|
69
96
|
orphanCount,
|
|
70
|
-
missingFromManifestCount
|
|
97
|
+
missingFromManifestCount,
|
|
98
|
+
staleCount
|
|
71
99
|
};
|
|
72
100
|
}
|
|
73
101
|
async function getIndexFreshness(manifest, config) {
|
|
@@ -120,22 +148,58 @@ async function getIndexFreshness(manifest, config) {
|
|
|
120
148
|
};
|
|
121
149
|
}
|
|
122
150
|
function formatSummaryFreshness(stats) {
|
|
123
|
-
if (stats.missingFromManifestCount === 0) {
|
|
151
|
+
if (stats.missingFromManifestCount === 0 && stats.staleCount === 0) {
|
|
124
152
|
return "fresh";
|
|
125
153
|
}
|
|
126
|
-
return `stale (missing: ${stats.missingFromManifestCount})`;
|
|
154
|
+
return `stale (missing: ${stats.missingFromManifestCount}, stale: ${stats.staleCount})`;
|
|
155
|
+
}
|
|
156
|
+
function buildSummarizeCommand(manifestOption) {
|
|
157
|
+
const command = "diffdoc summarize --mode all --refresh";
|
|
158
|
+
return manifestOption === "manifest.json" ? command : `${command} --out ${manifestOption}`;
|
|
159
|
+
}
|
|
160
|
+
function buildEmbedCommand(manifestOption) {
|
|
161
|
+
const command = "diffdoc embed";
|
|
162
|
+
return manifestOption === "manifest.json" ? command : `${command} --manifest ${manifestOption}`;
|
|
163
|
+
}
|
|
164
|
+
function getNextCommand(manifestOption, summaryStats, indexFreshness) {
|
|
165
|
+
if (summaryStats.missingFromManifestCount > 0 || summaryStats.staleCount > 0) {
|
|
166
|
+
return {
|
|
167
|
+
command: buildSummarizeCommand(manifestOption),
|
|
168
|
+
reason: "summary artifacts are missing or stale"
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
if (indexFreshness.status === "missing") {
|
|
172
|
+
return {
|
|
173
|
+
command: buildEmbedCommand(manifestOption),
|
|
174
|
+
reason: "vector index is missing"
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
if (indexFreshness.status === "stale") {
|
|
178
|
+
return {
|
|
179
|
+
command: buildEmbedCommand(manifestOption),
|
|
180
|
+
reason: "vector index is stale"
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
command: null,
|
|
185
|
+
reason: "summaries and index are fresh"
|
|
186
|
+
};
|
|
127
187
|
}
|
|
128
|
-
function buildStatusReport(manifest, summaryStats, indexFreshness) {
|
|
188
|
+
function buildStatusReport(manifest, summaryStats, indexFreshness, manifestOption) {
|
|
189
|
+
const nextCommand = getNextCommand(manifestOption, summaryStats, indexFreshness);
|
|
129
190
|
return {
|
|
130
191
|
manifestSchema: manifest.schemaVersion,
|
|
131
192
|
trackedFileCount: Object.keys(manifest.files).length,
|
|
132
193
|
summaryFileCount: summaryStats.summaryFileCount,
|
|
133
194
|
orphanCount: summaryStats.orphanCount,
|
|
134
195
|
summaryFreshness: {
|
|
135
|
-
status: summaryStats.missingFromManifestCount === 0 ? "fresh" : "stale",
|
|
136
|
-
missing: summaryStats.missingFromManifestCount
|
|
196
|
+
status: summaryStats.missingFromManifestCount === 0 && summaryStats.staleCount === 0 ? "fresh" : "stale",
|
|
197
|
+
missing: summaryStats.missingFromManifestCount,
|
|
198
|
+
stale: summaryStats.staleCount
|
|
137
199
|
},
|
|
138
|
-
indexFreshness
|
|
200
|
+
indexFreshness,
|
|
201
|
+
nextCommand: nextCommand.command,
|
|
202
|
+
nextCommandReason: nextCommand.reason
|
|
139
203
|
};
|
|
140
204
|
}
|
|
141
205
|
function formatIndexFreshness(freshness) {
|
|
@@ -152,7 +216,7 @@ async function runStatus(options, config) {
|
|
|
152
216
|
const manifest = await readManifest(manifestPath);
|
|
153
217
|
const summaryStats = await getSummaryStats(manifestPath, manifest);
|
|
154
218
|
const indexFreshness = await getIndexFreshness(manifest, config);
|
|
155
|
-
const report = buildStatusReport(manifest, summaryStats, indexFreshness);
|
|
219
|
+
const report = buildStatusReport(manifest, summaryStats, indexFreshness, options.manifest);
|
|
156
220
|
if (options.json) {
|
|
157
221
|
console.log(JSON.stringify(report, null, 2));
|
|
158
222
|
return;
|
|
@@ -161,6 +225,10 @@ async function runStatus(options, config) {
|
|
|
161
225
|
console.log(`tracked files: ${report.trackedFileCount}`);
|
|
162
226
|
console.log(`summary files: ${report.summaryFileCount}`);
|
|
163
227
|
console.log(`orphans: ${report.orphanCount}`);
|
|
228
|
+
console.log(`stale summaries: ${report.summaryFreshness.stale}`);
|
|
164
229
|
console.log(`summary freshness: ${formatSummaryFreshness(summaryStats)}`);
|
|
165
230
|
console.log(`index freshness: ${formatIndexFreshness(indexFreshness)}`);
|
|
231
|
+
console.log("");
|
|
232
|
+
console.log(`next command: ${report.nextCommand || "none"}`);
|
|
233
|
+
console.log(`reason: ${report.nextCommandReason}`);
|
|
166
234
|
}
|
|
@@ -71,15 +71,6 @@ function shouldIncludeFile(filePath, includeGlobs, excludeGlobs, ignoreMatcher)
|
|
|
71
71
|
function isIgnoredDirectory(dirPath, ignoreMatcher) {
|
|
72
72
|
return ignoreMatcher.ignores(dirPath) || ignoreMatcher.ignores(`${dirPath}/`);
|
|
73
73
|
}
|
|
74
|
-
async function fileExists(filePath) {
|
|
75
|
-
try {
|
|
76
|
-
await promises_1.default.access(filePath);
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
catch {
|
|
80
|
-
return false;
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
74
|
async function atomicWriteUtf8(targetPath, content) {
|
|
84
75
|
await promises_1.default.mkdir(node_path_1.default.dirname(targetPath), { recursive: true });
|
|
85
76
|
const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
|
|
@@ -99,6 +90,72 @@ async function writeManifest(manifestPath, manifest) {
|
|
|
99
90
|
async function writeSummaryAsset(summaryPath, summary) {
|
|
100
91
|
await atomicWriteUtf8(summaryPath, `${JSON.stringify(summary, null, 2)}\n`);
|
|
101
92
|
}
|
|
93
|
+
function getPromptHash(config) {
|
|
94
|
+
return config.summarize.resolvedSummaryPrompt
|
|
95
|
+
? (0, hashing_1.hashTextContent)(config.summarize.resolvedSummaryPrompt)
|
|
96
|
+
: undefined;
|
|
97
|
+
}
|
|
98
|
+
function buildSummaryMetadata(params) {
|
|
99
|
+
return {
|
|
100
|
+
file_path: params.filePath,
|
|
101
|
+
file_name: node_path_1.default.basename(params.filePath),
|
|
102
|
+
extension: node_path_1.default.extname(params.filePath),
|
|
103
|
+
line_count: params.rawCodeSnapshot.length === 0 ? 0 : params.rawCodeSnapshot.split(/\r\n|\r|\n/).length,
|
|
104
|
+
byte_size: Buffer.byteLength(params.rawCodeSnapshot, "utf8"),
|
|
105
|
+
content_hash: params.hash,
|
|
106
|
+
generated_at: params.generatedAt,
|
|
107
|
+
generator: {
|
|
108
|
+
provider: params.config.provider,
|
|
109
|
+
model: params.config.chat.model
|
|
110
|
+
},
|
|
111
|
+
prompt_version: llm_1.SUMMARY_PROMPT_VERSION,
|
|
112
|
+
summary_format: llm_1.SUMMARY_FORMAT,
|
|
113
|
+
custom_prompt_hash: params.customPromptHash,
|
|
114
|
+
custom_prompt_source: params.customPromptSource
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function isRecord(value) {
|
|
118
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
119
|
+
}
|
|
120
|
+
function hasExpectedCustomPromptHash(metadata, customPromptHash) {
|
|
121
|
+
const actual = typeof metadata.custom_prompt_hash === "string" ? metadata.custom_prompt_hash : undefined;
|
|
122
|
+
return actual === customPromptHash;
|
|
123
|
+
}
|
|
124
|
+
async function isSummaryAssetFresh(summaryPath, expected) {
|
|
125
|
+
let parsed;
|
|
126
|
+
try {
|
|
127
|
+
parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (!isRecord(parsed)) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION || parsed.content_hash !== expected.hash) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
if (expected.includeCodeSnapshot !== (typeof parsed.raw_code_snapshot === "string")) {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
if (!isRecord(parsed.metadata)) {
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
const metadata = parsed.metadata;
|
|
145
|
+
if (metadata.content_hash !== expected.hash) {
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
if (metadata.prompt_version !== expected.promptVersion || metadata.summary_format !== expected.summaryFormat) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
if (!hasExpectedCustomPromptHash(metadata, expected.customPromptHash)) {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
if (!isRecord(metadata.generator)) {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
return metadata.generator.provider === expected.provider && metadata.generator.model === expected.model;
|
|
158
|
+
}
|
|
102
159
|
async function readManifest(manifestPath) {
|
|
103
160
|
try {
|
|
104
161
|
const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
|
|
@@ -210,14 +267,12 @@ async function removeManifestPath(filePath, manifest, manifestPath, summaryDir,
|
|
|
210
267
|
await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
|
|
211
268
|
return true;
|
|
212
269
|
}
|
|
213
|
-
async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
|
|
270
|
+
async function ensureSummaryAsset(summaryDir, hash, metadata, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
|
|
214
271
|
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
215
|
-
if (await fileExists(summaryPath)) {
|
|
216
|
-
return;
|
|
217
|
-
}
|
|
218
272
|
const summary = {
|
|
219
273
|
schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
|
|
220
274
|
content_hash: hash,
|
|
275
|
+
metadata,
|
|
221
276
|
summary: summaryText,
|
|
222
277
|
raw_code_snapshot: includeCodeSnapshot ? rawCodeSnapshot : undefined
|
|
223
278
|
};
|
|
@@ -285,7 +340,18 @@ async function runSummarize(options, config) {
|
|
|
285
340
|
: config.summarize.excludeGlobs.map(normalizeGlobPattern));
|
|
286
341
|
const ignoreFile = options.ignoreFile || config.summarize.ignoreFile;
|
|
287
342
|
const ignoreMatcher = await readIgnoreMatcher(repoPath, ignoreFile);
|
|
288
|
-
const
|
|
343
|
+
const customPromptHash = getPromptHash(config);
|
|
344
|
+
const customPromptSource = customPromptHash ? config.summarize.summaryPromptSource : undefined;
|
|
345
|
+
const summaryFreshnessExpected = (hash) => ({
|
|
346
|
+
hash,
|
|
347
|
+
promptVersion: llm_1.SUMMARY_PROMPT_VERSION,
|
|
348
|
+
summaryFormat: llm_1.SUMMARY_FORMAT,
|
|
349
|
+
customPromptHash,
|
|
350
|
+
provider: config.provider,
|
|
351
|
+
model: config.chat.model,
|
|
352
|
+
includeCodeSnapshot: options.includeCodeSnapshot
|
|
353
|
+
});
|
|
354
|
+
const totals = { scanned: 0, skipped: 0, updated: 0, refreshed: 0, failed: 0, pruned: 0 };
|
|
289
355
|
const failures = [];
|
|
290
356
|
const isJson = options.json;
|
|
291
357
|
const concurrency = config.summarize.concurrency;
|
|
@@ -293,20 +359,31 @@ async function runSummarize(options, config) {
|
|
|
293
359
|
const summaryAssetTasks = new Map();
|
|
294
360
|
async function ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot) {
|
|
295
361
|
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
296
|
-
if (await
|
|
297
|
-
return;
|
|
362
|
+
if (!options.refresh && await isSummaryAssetFresh(summaryPath, summaryFreshnessExpected(hash))) {
|
|
363
|
+
return false;
|
|
298
364
|
}
|
|
299
365
|
let task = summaryAssetTasks.get(hash);
|
|
300
366
|
if (!task) {
|
|
301
367
|
task = (async () => {
|
|
302
|
-
const
|
|
303
|
-
|
|
368
|
+
const generatedAt = new Date().toISOString();
|
|
369
|
+
const metadata = buildSummaryMetadata({
|
|
370
|
+
filePath,
|
|
371
|
+
hash,
|
|
372
|
+
rawCodeSnapshot,
|
|
373
|
+
config,
|
|
374
|
+
generatedAt,
|
|
375
|
+
customPromptHash,
|
|
376
|
+
customPromptSource
|
|
377
|
+
});
|
|
378
|
+
const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, metadata, config.chat, config.summarize.resolvedSummaryPrompt);
|
|
379
|
+
await ensureSummaryAsset(summaryDir, hash, metadata, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
|
|
380
|
+
return true;
|
|
304
381
|
})().finally(() => {
|
|
305
382
|
summaryAssetTasks.delete(hash);
|
|
306
383
|
});
|
|
307
384
|
summaryAssetTasks.set(hash, task);
|
|
308
385
|
}
|
|
309
|
-
|
|
386
|
+
return task;
|
|
310
387
|
}
|
|
311
388
|
if (!isJson) {
|
|
312
389
|
console.log(`Starting summarize run`);
|
|
@@ -403,11 +480,17 @@ async function runSummarize(options, config) {
|
|
|
403
480
|
const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
|
|
404
481
|
const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
|
|
405
482
|
if (previousHash === hash) {
|
|
483
|
+
const regenerated = await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
406
484
|
await withManifestLock(async () => {
|
|
407
|
-
|
|
485
|
+
if (regenerated) {
|
|
486
|
+
totals.refreshed += 1;
|
|
487
|
+
}
|
|
488
|
+
else {
|
|
489
|
+
totals.skipped += 1;
|
|
490
|
+
}
|
|
408
491
|
completedModified += 1;
|
|
409
492
|
if (!isJson) {
|
|
410
|
-
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] unchanged ${filePath}`);
|
|
493
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] ${regenerated ? "refreshed" : "unchanged"} ${filePath}`);
|
|
411
494
|
}
|
|
412
495
|
});
|
|
413
496
|
return;
|
|
@@ -481,6 +564,7 @@ async function runSummarize(options, config) {
|
|
|
481
564
|
console.log(`Summarize complete`);
|
|
482
565
|
console.log(`Scanned: ${totals.scanned}`);
|
|
483
566
|
console.log(`Updated: ${totals.updated}`);
|
|
567
|
+
console.log(`Refreshed: ${totals.refreshed}`);
|
|
484
568
|
console.log(`Skipped: ${totals.skipped}`);
|
|
485
569
|
console.log(`Pruned: ${totals.pruned}`);
|
|
486
570
|
console.log(`Failed: ${totals.failed}`);
|
package/dist/config.js
CHANGED
|
@@ -36,6 +36,14 @@ function readPositiveIntegerOption(value, envName, fallback) {
|
|
|
36
36
|
}
|
|
37
37
|
return parsed;
|
|
38
38
|
}
|
|
39
|
+
function readPromptOption(value, envName) {
|
|
40
|
+
const option = value ?? process.env[envName];
|
|
41
|
+
return option && option.trim() ? option : undefined;
|
|
42
|
+
}
|
|
43
|
+
function resolvePromptFile(promptFile) {
|
|
44
|
+
const resolvedPath = node_path_1.default.resolve(process.cwd(), promptFile);
|
|
45
|
+
return node_fs_1.default.readFileSync(resolvedPath, "utf8");
|
|
46
|
+
}
|
|
39
47
|
function loadRcFile(configPath) {
|
|
40
48
|
const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
|
|
41
49
|
if (!node_fs_1.default.existsSync(resolvedPath)) {
|
|
@@ -73,6 +81,13 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
73
81
|
const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
|
|
74
82
|
const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
|
|
75
83
|
const summarizeConcurrency = readPositiveIntegerOption(mergedOptions.summarizeConcurrency, "DIFFDOC_SUMMARIZE_CONCURRENCY", 2);
|
|
84
|
+
const summaryPrompt = readPromptOption(mergedOptions.summaryPrompt, "DIFFDOC_SUMMARY_PROMPT");
|
|
85
|
+
const summaryPromptFile = readPromptOption(mergedOptions.summaryPromptFile, "DIFFDOC_SUMMARY_PROMPT_FILE");
|
|
86
|
+
if (summaryPrompt && summaryPromptFile) {
|
|
87
|
+
throw new Error("Configure either summaryPrompt or summaryPromptFile, not both.");
|
|
88
|
+
}
|
|
89
|
+
const resolvedSummaryPrompt = summaryPromptFile ? resolvePromptFile(summaryPromptFile) : summaryPrompt;
|
|
90
|
+
const summaryPromptSource = summaryPromptFile ? summaryPromptFile : summaryPrompt ? "inline" : undefined;
|
|
76
91
|
const chatBaseURL = provider === "cloud"
|
|
77
92
|
? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
|
|
78
93
|
: readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
|
|
@@ -118,7 +133,11 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
118
133
|
includeGlobs,
|
|
119
134
|
excludeGlobs,
|
|
120
135
|
ignoreFile,
|
|
121
|
-
concurrency: summarizeConcurrency
|
|
136
|
+
concurrency: summarizeConcurrency,
|
|
137
|
+
summaryPrompt,
|
|
138
|
+
summaryPromptFile,
|
|
139
|
+
resolvedSummaryPrompt,
|
|
140
|
+
summaryPromptSource
|
|
122
141
|
}
|
|
123
142
|
};
|
|
124
143
|
}
|
package/dist/index.js
CHANGED
|
@@ -42,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
|
|
|
42
42
|
program
|
|
43
43
|
.name("diffdoc")
|
|
44
44
|
.description("Translate repository code shifts into plain-English business context")
|
|
45
|
-
.version("0.
|
|
45
|
+
.version("0.6.0");
|
|
46
46
|
program
|
|
47
47
|
.command("init")
|
|
48
48
|
.description("Initialize DiffDoc configuration for this repository")
|
|
@@ -72,6 +72,9 @@ addChatOptions(addBaseOptions(program
|
|
|
72
72
|
.option("--exclude-glob <pattern>", "exclude glob pattern (repeatable)", collectOption, [])
|
|
73
73
|
.option("--ignore-file <path>", "path to ignore pattern file relative to --path")
|
|
74
74
|
.option("--summarize-concurrency <count>", "number of files to summarize concurrently")
|
|
75
|
+
.option("--summary-prompt <text>", "additional instructions for summary generation")
|
|
76
|
+
.option("--summary-prompt-file <path>", "path to additional summary prompt instructions")
|
|
77
|
+
.option("--refresh", "regenerate summaries even when source and summary metadata are fresh", false)
|
|
75
78
|
.action(async (options) => {
|
|
76
79
|
try {
|
|
77
80
|
const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
|
|
@@ -83,7 +86,8 @@ addChatOptions(addBaseOptions(program
|
|
|
83
86
|
json: options.json,
|
|
84
87
|
includeGlobs: options.includeGlob,
|
|
85
88
|
excludeGlobs: options.excludeGlob,
|
|
86
|
-
ignoreFile: options.ignoreFile
|
|
89
|
+
ignoreFile: options.ignoreFile,
|
|
90
|
+
refresh: options.refresh
|
|
87
91
|
}, config);
|
|
88
92
|
}
|
|
89
93
|
catch (error) {
|
package/dist/types/artifacts.js
CHANGED
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SUMMARY_ASSET_SCHEMA_VERSION = exports.MANIFEST_SCHEMA_VERSION = void 0;
|
|
4
4
|
exports.MANIFEST_SCHEMA_VERSION = 2;
|
|
5
|
-
exports.SUMMARY_ASSET_SCHEMA_VERSION =
|
|
5
|
+
exports.SUMMARY_ASSET_SCHEMA_VERSION = 2;
|
package/dist/utils/hashing.js
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.hashFileContent = hashFileContent;
|
|
4
|
+
exports.hashTextContent = hashTextContent;
|
|
4
5
|
const node_crypto_1 = require("node:crypto");
|
|
5
6
|
function hashFileContent(fileContent) {
|
|
6
7
|
return (0, node_crypto_1.createHash)("md5").update(fileContent, "utf8").digest("hex");
|
|
7
8
|
}
|
|
9
|
+
function hashTextContent(textContent) {
|
|
10
|
+
return (0, node_crypto_1.createHash)("sha256").update(textContent, "utf8").digest("hex");
|
|
11
|
+
}
|
package/dist/utils/llm.js
CHANGED
|
@@ -3,17 +3,108 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.SUMMARY_FORMAT = exports.SUMMARY_PROMPT_VERSION = void 0;
|
|
6
7
|
exports.generateFunctionalSummary = generateFunctionalSummary;
|
|
7
8
|
exports.generateAnswer = generateAnswer;
|
|
8
9
|
exports.generateEmbeddings = generateEmbeddings;
|
|
9
10
|
const openai_1 = __importDefault(require("openai"));
|
|
11
|
+
exports.SUMMARY_PROMPT_VERSION = 1;
|
|
12
|
+
exports.SUMMARY_FORMAT = "structured-functional-v1";
|
|
13
|
+
const SUMMARY_SYSTEM_PROMPT = `Generate a structured DiffDoc functional summary for the provided source file.
|
|
14
|
+
|
|
15
|
+
Required headings, exactly once and in this order:
|
|
16
|
+
## Metadata
|
|
17
|
+
## Purpose
|
|
18
|
+
## User-Visible Behavior
|
|
19
|
+
## Business Rules
|
|
20
|
+
## Data Inputs And Outputs
|
|
21
|
+
## Side Effects
|
|
22
|
+
## Error And Edge Cases
|
|
23
|
+
## Dependencies
|
|
24
|
+
## Operational Notes
|
|
25
|
+
|
|
26
|
+
Section guidance:
|
|
27
|
+
|
|
28
|
+
## Metadata
|
|
29
|
+
Include file-level context useful for search and retrieval. This section is mandatory and must contain every bullet below exactly once, in this order:
|
|
30
|
+
- File path: {copy the provided file path exactly}
|
|
31
|
+
- File name: {copy the provided file name exactly}
|
|
32
|
+
- Extension: {copy the provided extension exactly}
|
|
33
|
+
- Inferred language/type: {infer from file path, file name, extension, and code content}
|
|
34
|
+
- Content hash: {copy the provided content hash exactly}
|
|
35
|
+
- Line count: {copy the provided line count exactly}
|
|
36
|
+
- Byte size: {copy the provided byte size exactly}
|
|
37
|
+
- Summary format: {copy the provided summary format exactly}
|
|
38
|
+
- Notable symbols/classes/functions: {infer from code, or write "None identified."}
|
|
39
|
+
- External dependencies: {infer from imports, packages, runtime services, external APIs, or write "None identified."}
|
|
40
|
+
- Internal dependencies: {infer from project imports, local modules, local artifacts, or write "None identified."}
|
|
41
|
+
- Public API/exports: {infer exported functions, classes, types, routes, commands, tools, or write "None identified."}
|
|
42
|
+
|
|
43
|
+
## Purpose
|
|
44
|
+
Explain why this file exists and the main responsibility it serves.
|
|
45
|
+
Examples: handles login requests, builds a vector index, loads runtime configuration.
|
|
46
|
+
|
|
47
|
+
## User-Visible Behavior
|
|
48
|
+
Describe behavior users, operators, developers, or API consumers would observe.
|
|
49
|
+
Examples: CLI output, API responses, UI behavior, created/updated/deleted files, validation errors.
|
|
50
|
+
|
|
51
|
+
## Business Rules
|
|
52
|
+
Describe implemented rules, constraints, decisions, and policy-like behavior.
|
|
53
|
+
Examples: required fields, valid modes, filtering precedence, defaults, validation rules, skip conditions.
|
|
54
|
+
|
|
55
|
+
## Data Inputs And Outputs
|
|
56
|
+
Describe what data enters and leaves this file's behavior.
|
|
57
|
+
Examples: input files, config values, environment variables, function arguments, API payloads, generated artifacts, return values.
|
|
58
|
+
|
|
59
|
+
## Side Effects
|
|
60
|
+
Describe changes caused outside local computation.
|
|
61
|
+
Examples: writes files, deletes files, calls external services, updates indexes, logs output, mutates shared state, sends network requests.
|
|
62
|
+
|
|
63
|
+
## Error And Edge Cases
|
|
64
|
+
Describe failure handling and unusual conditions.
|
|
65
|
+
Examples: missing files, invalid config, unsupported schemas, empty results, network/model failures, deleted or unchanged files.
|
|
66
|
+
|
|
67
|
+
## Dependencies
|
|
68
|
+
Describe important internal and external dependencies.
|
|
69
|
+
Examples: imported packages, runtime services, local artifacts, external APIs, models/providers, framework components, project files.
|
|
70
|
+
|
|
71
|
+
## Operational Notes
|
|
72
|
+
Describe details useful for running, maintaining, scaling, or debugging.
|
|
73
|
+
Examples: concurrency, performance, idempotency, caching/reuse, schema implications, regeneration requirements, security/privacy considerations.
|
|
74
|
+
|
|
75
|
+
Rules:
|
|
76
|
+
- Use every heading exactly once.
|
|
77
|
+
- Use headings in the required order.
|
|
78
|
+
- Start with ## Metadata.
|
|
79
|
+
- Include provided deterministic metadata values exactly.
|
|
80
|
+
- Do not rename, omit, reorder, or merge Metadata bullets.
|
|
81
|
+
- Infer the language/type from the provided file path, file name, extension, and code content. Prefer code content when extension is ambiguous. If uncertain, provide the best likely language/type and briefly note uncertainty.
|
|
82
|
+
- Let the code identify symbols, classes, functions, and dependencies. Include important identifiers when useful for search.
|
|
83
|
+
- If a section has no applicable content, write "None identified."
|
|
84
|
+
- Do not invent behavior, requirements, dependencies, or intent not supported by the code.
|
|
85
|
+
- Summarize implemented behavior only.
|
|
86
|
+
- Prefer specific behavior over generic descriptions.
|
|
87
|
+
- Use plain English.
|
|
88
|
+
- Provide zero conversational preamble.
|
|
89
|
+
- Do not include Markdown sections outside the required headings.`;
|
|
10
90
|
function createClient(config) {
|
|
11
91
|
return {
|
|
12
92
|
client: new openai_1.default({ apiKey: config.apiKey, baseURL: config.baseURL }),
|
|
13
93
|
model: config.model
|
|
14
94
|
};
|
|
15
95
|
}
|
|
16
|
-
|
|
96
|
+
function formatMetadataForPrompt(metadata) {
|
|
97
|
+
return [
|
|
98
|
+
`- File path: ${metadata.file_path}`,
|
|
99
|
+
`- File name: ${metadata.file_name}`,
|
|
100
|
+
`- Extension: ${metadata.extension || "None"}`,
|
|
101
|
+
`- Content hash: ${metadata.content_hash}`,
|
|
102
|
+
`- Line count: ${metadata.line_count}`,
|
|
103
|
+
`- Byte size: ${metadata.byte_size}`,
|
|
104
|
+
`- Summary format: ${metadata.summary_format}`
|
|
105
|
+
].join("\n");
|
|
106
|
+
}
|
|
107
|
+
async function generateFunctionalSummary(fileName, codeContent, metadata, config, customPrompt) {
|
|
17
108
|
const { client, model } = createClient(config);
|
|
18
109
|
const response = await client.chat.completions.create({
|
|
19
110
|
model,
|
|
@@ -21,11 +112,11 @@ async function generateFunctionalSummary(fileName, codeContent, config) {
|
|
|
21
112
|
messages: [
|
|
22
113
|
{
|
|
23
114
|
role: "system",
|
|
24
|
-
content:
|
|
115
|
+
content: SUMMARY_SYSTEM_PROMPT
|
|
25
116
|
},
|
|
26
117
|
{
|
|
27
118
|
role: "user",
|
|
28
|
-
content: `File: ${fileName}\n\nCode:\n${codeContent}`
|
|
119
|
+
content: `File: ${fileName}\n\nProvided metadata:\n${formatMetadataForPrompt(metadata)}\n\nConsumer instructions:\n${customPrompt && customPrompt.trim() ? customPrompt.trim() : "None."}\n\nCode:\n${codeContent}`
|
|
29
120
|
}
|
|
30
121
|
]
|
|
31
122
|
});
|