diffdoc 0.4.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.diffdocrc.example +1 -0
- package/README.md +57 -3
- package/dist/commands/embed.js +7 -6
- package/dist/commands/init.js +1 -0
- package/dist/commands/status.js +76 -8
- package/dist/commands/summarize.js +227 -85
- package/dist/config.js +22 -1
- package/dist/index.js +7 -2
- package/dist/types/artifacts.js +1 -1
- package/dist/utils/hashing.js +4 -0
- package/dist/utils/llm.js +94 -3
- package/package.json +1 -1
package/.diffdocrc.example
CHANGED
package/README.md
CHANGED
|
@@ -125,6 +125,7 @@ Example `.diffdocrc` for local models:
|
|
|
125
125
|
"localChatModel": "qwen2.5-coder:7b",
|
|
126
126
|
"localEmbedModel": "nomic-embed-code",
|
|
127
127
|
"embedBatchSize": 25,
|
|
128
|
+
"summarizeConcurrency": 2,
|
|
128
129
|
"includeGlobs": [],
|
|
129
130
|
"excludeGlobs": [],
|
|
130
131
|
"ignoreFile": ".diffdocignore"
|
|
@@ -141,6 +142,7 @@ Example `.diffdocrc` for a cloud OpenAI-compatible endpoint:
|
|
|
141
142
|
"cloudChatModel": "gpt-4o-mini",
|
|
142
143
|
"cloudEmbedModel": "text-embedding-3-small",
|
|
143
144
|
"embedBatchSize": 25,
|
|
145
|
+
"summarizeConcurrency": 2,
|
|
144
146
|
"includeGlobs": [],
|
|
145
147
|
"excludeGlobs": [],
|
|
146
148
|
"ignoreFile": ".diffdocignore"
|
|
@@ -159,9 +161,12 @@ Supported environment variables:
|
|
|
159
161
|
AI_PROVIDER
|
|
160
162
|
DIFFDOC_BASE_DIR
|
|
161
163
|
DIFFDOC_EMBED_BATCH_SIZE
|
|
164
|
+
DIFFDOC_SUMMARIZE_CONCURRENCY
|
|
162
165
|
DIFFDOC_INCLUDE_GLOBS
|
|
163
166
|
DIFFDOC_EXCLUDE_GLOBS
|
|
164
167
|
DIFFDOC_IGNORE_FILE
|
|
168
|
+
DIFFDOC_SUMMARY_PROMPT
|
|
169
|
+
DIFFDOC_SUMMARY_PROMPT_FILE
|
|
165
170
|
LOCAL_LLM_ENDPOINT
|
|
166
171
|
LOCAL_CHAT_MODEL
|
|
167
172
|
LOCAL_EMBED_ENDPOINT
|
|
@@ -227,14 +232,27 @@ Summarize files into `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json`:
|
|
|
227
232
|
npx diffdoc summarize --path . --mode all
|
|
228
233
|
npx diffdoc summarize --path . --mode delta
|
|
229
234
|
npx diffdoc summarize --path . --mode delta --json
|
|
235
|
+
npx diffdoc summarize --path . --mode all --summarize-concurrency 4
|
|
236
|
+
npx diffdoc summarize --path . --mode all --refresh
|
|
230
237
|
```
|
|
231
238
|
|
|
232
|
-
|
|
239
|
+
Summarization runs with bounded concurrency. The default is `2`; use `1` for strict rate limits, `2-4` for most providers, and higher values only when your local model server or API quota can handle the request volume.
|
|
240
|
+
|
|
241
|
+
Use `--summary-prompt` or `--summary-prompt-file` to add domain-specific guidance without replacing DiffDoc's default structured prompt:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
npx diffdoc summarize --summary-prompt "Emphasize billing behavior, permissions, data retention, and operational risk."
|
|
245
|
+
npx diffdoc summarize --summary-prompt-file ./diffdoc-summary-prompt.md
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
Raw code snapshots are optional. DiffDoc normally stores file path and content hash metadata so tools can look up source files from the repository when needed. Store raw code snapshots only when you need exported, offline, or point-in-time audit artifacts to include source text:
|
|
233
249
|
|
|
234
250
|
```bash
|
|
235
251
|
npx diffdoc summarize --path . --mode all --include-code-snapshot
|
|
236
252
|
```
|
|
237
253
|
|
|
254
|
+
Snapshots increase artifact size and duplicate source code, which can include sensitive or proprietary content.
|
|
255
|
+
|
|
238
256
|
Check manifest and index freshness:
|
|
239
257
|
|
|
240
258
|
```bash
|
|
@@ -242,6 +260,8 @@ npx diffdoc status
|
|
|
242
260
|
npx diffdoc status --json
|
|
243
261
|
```
|
|
244
262
|
|
|
263
|
+
`status` also recommends the next command to run. It prioritizes refreshing missing or stale summaries before rebuilding the vector index.
|
|
264
|
+
|
|
245
265
|
Embed summaries into the local Vectra index:
|
|
246
266
|
|
|
247
267
|
```bash
|
|
@@ -299,13 +319,47 @@ Each summary asset is portable JSON:
|
|
|
299
319
|
|
|
300
320
|
```json
|
|
301
321
|
{
|
|
302
|
-
"schemaVersion":
|
|
322
|
+
"schemaVersion": 2,
|
|
303
323
|
"content_hash": "md5-string",
|
|
304
|
-
"
|
|
324
|
+
"metadata": {
|
|
325
|
+
"file_path": "src/example.ts",
|
|
326
|
+
"file_name": "example.ts",
|
|
327
|
+
"extension": ".ts",
|
|
328
|
+
"line_count": 42,
|
|
329
|
+
"byte_size": 1200,
|
|
330
|
+
"content_hash": "md5-string",
|
|
331
|
+
"generated_at": "2026-05-27T00:00:00.000Z",
|
|
332
|
+
"generator": {
|
|
333
|
+
"provider": "local",
|
|
334
|
+
"model": "qwen2.5-coder:7b",
|
|
335
|
+
"base_url": "http://localhost:11434/v1"
|
|
336
|
+
},
|
|
337
|
+
"prompt_version": 1,
|
|
338
|
+
"summary_format": "structured-functional-v1"
|
|
339
|
+
},
|
|
340
|
+
"summary": "## Metadata\n- File path: src/example.ts\n...",
|
|
305
341
|
"raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
|
|
306
342
|
}
|
|
307
343
|
```
|
|
308
344
|
|
|
345
|
+
The JSON `metadata` contains deterministic source and generation facts. The markdown `summary` begins with `## Metadata`, which is embedded with the rest of the summary so file paths, hashes, inferred language/type, symbols, functions, classes, and dependencies are searchable. Language/type and symbol/dependency details are inferred by the model from the file path, extension, and code content rather than maintained through a static parser.
|
|
346
|
+
|
|
347
|
+
Structured summaries use these sections in order:
|
|
348
|
+
|
|
349
|
+
```md
|
|
350
|
+
## Metadata
|
|
351
|
+
## Purpose
|
|
352
|
+
## User-Visible Behavior
|
|
353
|
+
## Business Rules
|
|
354
|
+
## Data Inputs And Outputs
|
|
355
|
+
## Side Effects
|
|
356
|
+
## Error And Edge Cases
|
|
357
|
+
## Dependencies
|
|
358
|
+
## Operational Notes
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
Summary assets are regenerated when the source hash changes, summary schema changes, prompt version changes, summary format changes, custom prompt hash changes, provider/model changes, or `--refresh` is passed. Regenerate existing schema `1` artifacts with `npx diffdoc summarize --mode all --refresh`. The `embed` command remains tolerant of older summary assets as long as they contain a content hash and summary text; use `status` or `summarize` to identify and refresh stale metadata.
|
|
362
|
+
|
|
309
363
|
Commit `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json` if you want summaries shared across machines or CI runs. Keep `.diffdoc/vectra/` local unless you have a specific reason to commit the generated vector index.
|
|
310
364
|
|
|
311
365
|
The manifest and summary assets are the stable handoff point for consumers. The local Vectra index produced by `diffdoc embed` is optional and can be replaced by any embedding model and storage backend that fits your environment.
|
package/dist/commands/embed.js
CHANGED
|
@@ -34,8 +34,8 @@ async function readManifest(manifestPath) {
|
|
|
34
34
|
}
|
|
35
35
|
async function readSummaryAsset(summaryPath) {
|
|
36
36
|
const parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
|
|
37
|
-
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
|
|
38
|
-
throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion
|
|
37
|
+
if (typeof parsed.schemaVersion !== "number" || parsed.schemaVersion < 1 || parsed.schemaVersion > artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION) {
|
|
38
|
+
throw new Error(`Unsupported summary schema in ${summaryPath}. Expected schemaVersion 1-${artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION}.`);
|
|
39
39
|
}
|
|
40
40
|
if (typeof parsed.content_hash !== "string") {
|
|
41
41
|
throw new Error(`Invalid summary hash in ${summaryPath}.`);
|
|
@@ -44,14 +44,15 @@ async function readSummaryAsset(summaryPath) {
|
|
|
44
44
|
throw new Error(`Invalid summary text in ${summaryPath}.`);
|
|
45
45
|
}
|
|
46
46
|
return {
|
|
47
|
-
schemaVersion:
|
|
47
|
+
schemaVersion: parsed.schemaVersion,
|
|
48
48
|
content_hash: parsed.content_hash,
|
|
49
|
+
metadata: parsed.metadata && typeof parsed.metadata === "object" ? parsed.metadata : undefined,
|
|
49
50
|
summary: parsed.summary,
|
|
50
51
|
raw_code_snapshot: typeof parsed.raw_code_snapshot === "string" ? parsed.raw_code_snapshot : undefined
|
|
51
52
|
};
|
|
52
53
|
}
|
|
53
|
-
function buildDocument(
|
|
54
|
-
return
|
|
54
|
+
function buildDocument(summaryAsset) {
|
|
55
|
+
return summaryAsset.summary;
|
|
55
56
|
}
|
|
56
57
|
async function runEmbed(options, config) {
|
|
57
58
|
const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
|
|
@@ -96,7 +97,7 @@ async function runEmbed(options, config) {
|
|
|
96
97
|
hash,
|
|
97
98
|
summaryText: summaryAsset.summary,
|
|
98
99
|
rawCodeSnapshot: summaryAsset.raw_code_snapshot,
|
|
99
|
-
document: buildDocument(
|
|
100
|
+
document: buildDocument(summaryAsset)
|
|
100
101
|
});
|
|
101
102
|
}
|
|
102
103
|
const activePathSet = new Set(entries.map(([filePath]) => filePath));
|
package/dist/commands/init.js
CHANGED
package/dist/commands/status.js
CHANGED
|
@@ -10,6 +10,7 @@ const vectra_1 = require("vectra");
|
|
|
10
10
|
const embed_1 = require("./embed");
|
|
11
11
|
const artifacts_1 = require("../types/artifacts");
|
|
12
12
|
const paths_1 = require("../utils/paths");
|
|
13
|
+
const llm_1 = require("../utils/llm");
|
|
13
14
|
function getSummaryDir(manifestPath) {
|
|
14
15
|
return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
|
|
15
16
|
}
|
|
@@ -64,10 +65,37 @@ async function getSummaryStats(manifestPath, manifest) {
|
|
|
64
65
|
missingFromManifestCount += 1;
|
|
65
66
|
}
|
|
66
67
|
}
|
|
68
|
+
let staleCount = 0;
|
|
69
|
+
for (const hash of manifestHashes) {
|
|
70
|
+
if (!summaryHashes.has(hash)) {
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
const parsed = JSON.parse(await promises_1.default.readFile(node_path_1.default.resolve(summaryDir, `${hash}.json`), "utf8"));
|
|
75
|
+
const metadata = parsed.metadata && typeof parsed.metadata === "object" && !Array.isArray(parsed.metadata)
|
|
76
|
+
? parsed.metadata
|
|
77
|
+
: undefined;
|
|
78
|
+
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION ||
|
|
79
|
+
parsed.content_hash !== hash ||
|
|
80
|
+
!metadata ||
|
|
81
|
+
typeof metadata.file_path !== "string" ||
|
|
82
|
+
typeof metadata.file_name !== "string" ||
|
|
83
|
+
typeof metadata.extension !== "string" ||
|
|
84
|
+
metadata.content_hash !== hash ||
|
|
85
|
+
metadata.prompt_version !== llm_1.SUMMARY_PROMPT_VERSION ||
|
|
86
|
+
metadata.summary_format !== llm_1.SUMMARY_FORMAT) {
|
|
87
|
+
staleCount += 1;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
staleCount += 1;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
67
94
|
return {
|
|
68
95
|
summaryFileCount: summaryHashes.size,
|
|
69
96
|
orphanCount,
|
|
70
|
-
missingFromManifestCount
|
|
97
|
+
missingFromManifestCount,
|
|
98
|
+
staleCount
|
|
71
99
|
};
|
|
72
100
|
}
|
|
73
101
|
async function getIndexFreshness(manifest, config) {
|
|
@@ -120,22 +148,58 @@ async function getIndexFreshness(manifest, config) {
|
|
|
120
148
|
};
|
|
121
149
|
}
|
|
122
150
|
function formatSummaryFreshness(stats) {
|
|
123
|
-
if (stats.missingFromManifestCount === 0) {
|
|
151
|
+
if (stats.missingFromManifestCount === 0 && stats.staleCount === 0) {
|
|
124
152
|
return "fresh";
|
|
125
153
|
}
|
|
126
|
-
return `stale (missing: ${stats.missingFromManifestCount})`;
|
|
154
|
+
return `stale (missing: ${stats.missingFromManifestCount}, stale: ${stats.staleCount})`;
|
|
155
|
+
}
|
|
156
|
+
function buildSummarizeCommand(manifestOption) {
|
|
157
|
+
const command = "diffdoc summarize --mode all --refresh";
|
|
158
|
+
return manifestOption === "manifest.json" ? command : `${command} --out ${manifestOption}`;
|
|
159
|
+
}
|
|
160
|
+
function buildEmbedCommand(manifestOption) {
|
|
161
|
+
const command = "diffdoc embed";
|
|
162
|
+
return manifestOption === "manifest.json" ? command : `${command} --manifest ${manifestOption}`;
|
|
163
|
+
}
|
|
164
|
+
function getNextCommand(manifestOption, summaryStats, indexFreshness) {
|
|
165
|
+
if (summaryStats.missingFromManifestCount > 0 || summaryStats.staleCount > 0) {
|
|
166
|
+
return {
|
|
167
|
+
command: buildSummarizeCommand(manifestOption),
|
|
168
|
+
reason: "summary artifacts are missing or stale"
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
if (indexFreshness.status === "missing") {
|
|
172
|
+
return {
|
|
173
|
+
command: buildEmbedCommand(manifestOption),
|
|
174
|
+
reason: "vector index is missing"
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
if (indexFreshness.status === "stale") {
|
|
178
|
+
return {
|
|
179
|
+
command: buildEmbedCommand(manifestOption),
|
|
180
|
+
reason: "vector index is stale"
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
command: null,
|
|
185
|
+
reason: "summaries and index are fresh"
|
|
186
|
+
};
|
|
127
187
|
}
|
|
128
|
-
function buildStatusReport(manifest, summaryStats, indexFreshness) {
|
|
188
|
+
function buildStatusReport(manifest, summaryStats, indexFreshness, manifestOption) {
|
|
189
|
+
const nextCommand = getNextCommand(manifestOption, summaryStats, indexFreshness);
|
|
129
190
|
return {
|
|
130
191
|
manifestSchema: manifest.schemaVersion,
|
|
131
192
|
trackedFileCount: Object.keys(manifest.files).length,
|
|
132
193
|
summaryFileCount: summaryStats.summaryFileCount,
|
|
133
194
|
orphanCount: summaryStats.orphanCount,
|
|
134
195
|
summaryFreshness: {
|
|
135
|
-
status: summaryStats.missingFromManifestCount === 0 ? "fresh" : "stale",
|
|
136
|
-
missing: summaryStats.missingFromManifestCount
|
|
196
|
+
status: summaryStats.missingFromManifestCount === 0 && summaryStats.staleCount === 0 ? "fresh" : "stale",
|
|
197
|
+
missing: summaryStats.missingFromManifestCount,
|
|
198
|
+
stale: summaryStats.staleCount
|
|
137
199
|
},
|
|
138
|
-
indexFreshness
|
|
200
|
+
indexFreshness,
|
|
201
|
+
nextCommand: nextCommand.command,
|
|
202
|
+
nextCommandReason: nextCommand.reason
|
|
139
203
|
};
|
|
140
204
|
}
|
|
141
205
|
function formatIndexFreshness(freshness) {
|
|
@@ -152,7 +216,7 @@ async function runStatus(options, config) {
|
|
|
152
216
|
const manifest = await readManifest(manifestPath);
|
|
153
217
|
const summaryStats = await getSummaryStats(manifestPath, manifest);
|
|
154
218
|
const indexFreshness = await getIndexFreshness(manifest, config);
|
|
155
|
-
const report = buildStatusReport(manifest, summaryStats, indexFreshness);
|
|
219
|
+
const report = buildStatusReport(manifest, summaryStats, indexFreshness, options.manifest);
|
|
156
220
|
if (options.json) {
|
|
157
221
|
console.log(JSON.stringify(report, null, 2));
|
|
158
222
|
return;
|
|
@@ -161,6 +225,10 @@ async function runStatus(options, config) {
|
|
|
161
225
|
console.log(`tracked files: ${report.trackedFileCount}`);
|
|
162
226
|
console.log(`summary files: ${report.summaryFileCount}`);
|
|
163
227
|
console.log(`orphans: ${report.orphanCount}`);
|
|
228
|
+
console.log(`stale summaries: ${report.summaryFreshness.stale}`);
|
|
164
229
|
console.log(`summary freshness: ${formatSummaryFreshness(summaryStats)}`);
|
|
165
230
|
console.log(`index freshness: ${formatIndexFreshness(indexFreshness)}`);
|
|
231
|
+
console.log("");
|
|
232
|
+
console.log(`next command: ${report.nextCommand || "none"}`);
|
|
233
|
+
console.log(`reason: ${report.nextCommandReason}`);
|
|
166
234
|
}
|
|
@@ -71,15 +71,6 @@ function shouldIncludeFile(filePath, includeGlobs, excludeGlobs, ignoreMatcher)
|
|
|
71
71
|
function isIgnoredDirectory(dirPath, ignoreMatcher) {
|
|
72
72
|
return ignoreMatcher.ignores(dirPath) || ignoreMatcher.ignores(`${dirPath}/`);
|
|
73
73
|
}
|
|
74
|
-
async function fileExists(filePath) {
|
|
75
|
-
try {
|
|
76
|
-
await promises_1.default.access(filePath);
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
catch {
|
|
80
|
-
return false;
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
74
|
async function atomicWriteUtf8(targetPath, content) {
|
|
84
75
|
await promises_1.default.mkdir(node_path_1.default.dirname(targetPath), { recursive: true });
|
|
85
76
|
const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
|
|
@@ -99,6 +90,72 @@ async function writeManifest(manifestPath, manifest) {
|
|
|
99
90
|
async function writeSummaryAsset(summaryPath, summary) {
|
|
100
91
|
await atomicWriteUtf8(summaryPath, `${JSON.stringify(summary, null, 2)}\n`);
|
|
101
92
|
}
|
|
93
|
+
function getPromptHash(config) {
|
|
94
|
+
return config.summarize.resolvedSummaryPrompt
|
|
95
|
+
? (0, hashing_1.hashTextContent)(config.summarize.resolvedSummaryPrompt)
|
|
96
|
+
: undefined;
|
|
97
|
+
}
|
|
98
|
+
function buildSummaryMetadata(params) {
|
|
99
|
+
return {
|
|
100
|
+
file_path: params.filePath,
|
|
101
|
+
file_name: node_path_1.default.basename(params.filePath),
|
|
102
|
+
extension: node_path_1.default.extname(params.filePath),
|
|
103
|
+
line_count: params.rawCodeSnapshot.length === 0 ? 0 : params.rawCodeSnapshot.split(/\r\n|\r|\n/).length,
|
|
104
|
+
byte_size: Buffer.byteLength(params.rawCodeSnapshot, "utf8"),
|
|
105
|
+
content_hash: params.hash,
|
|
106
|
+
generated_at: params.generatedAt,
|
|
107
|
+
generator: {
|
|
108
|
+
provider: params.config.provider,
|
|
109
|
+
model: params.config.chat.model
|
|
110
|
+
},
|
|
111
|
+
prompt_version: llm_1.SUMMARY_PROMPT_VERSION,
|
|
112
|
+
summary_format: llm_1.SUMMARY_FORMAT,
|
|
113
|
+
custom_prompt_hash: params.customPromptHash,
|
|
114
|
+
custom_prompt_source: params.customPromptSource
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function isRecord(value) {
|
|
118
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
119
|
+
}
|
|
120
|
+
function hasExpectedCustomPromptHash(metadata, customPromptHash) {
|
|
121
|
+
const actual = typeof metadata.custom_prompt_hash === "string" ? metadata.custom_prompt_hash : undefined;
|
|
122
|
+
return actual === customPromptHash;
|
|
123
|
+
}
|
|
124
|
+
async function isSummaryAssetFresh(summaryPath, expected) {
|
|
125
|
+
let parsed;
|
|
126
|
+
try {
|
|
127
|
+
parsed = JSON.parse(await promises_1.default.readFile(summaryPath, "utf8"));
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (!isRecord(parsed)) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
if (parsed.schemaVersion !== artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION || parsed.content_hash !== expected.hash) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
if (expected.includeCodeSnapshot !== (typeof parsed.raw_code_snapshot === "string")) {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
if (!isRecord(parsed.metadata)) {
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
const metadata = parsed.metadata;
|
|
145
|
+
if (metadata.content_hash !== expected.hash) {
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
if (metadata.prompt_version !== expected.promptVersion || metadata.summary_format !== expected.summaryFormat) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
if (!hasExpectedCustomPromptHash(metadata, expected.customPromptHash)) {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
if (!isRecord(metadata.generator)) {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
return metadata.generator.provider === expected.provider && metadata.generator.model === expected.model;
|
|
158
|
+
}
|
|
102
159
|
async function readManifest(manifestPath) {
|
|
103
160
|
try {
|
|
104
161
|
const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
|
|
@@ -210,19 +267,36 @@ async function removeManifestPath(filePath, manifest, manifestPath, summaryDir,
|
|
|
210
267
|
await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
|
|
211
268
|
return true;
|
|
212
269
|
}
|
|
213
|
-
async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
|
|
270
|
+
async function ensureSummaryAsset(summaryDir, hash, metadata, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
|
|
214
271
|
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
215
|
-
if (await fileExists(summaryPath)) {
|
|
216
|
-
return;
|
|
217
|
-
}
|
|
218
272
|
const summary = {
|
|
219
273
|
schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
|
|
220
274
|
content_hash: hash,
|
|
275
|
+
metadata,
|
|
221
276
|
summary: summaryText,
|
|
222
277
|
raw_code_snapshot: includeCodeSnapshot ? rawCodeSnapshot : undefined
|
|
223
278
|
};
|
|
224
279
|
await writeSummaryAsset(summaryPath, summary);
|
|
225
280
|
}
|
|
281
|
+
async function runWithConcurrency(items, concurrency, worker) {
|
|
282
|
+
let nextIndex = 0;
|
|
283
|
+
const workerCount = Math.min(concurrency, items.length);
|
|
284
|
+
await Promise.all(Array.from({ length: workerCount }, async () => {
|
|
285
|
+
while (nextIndex < items.length) {
|
|
286
|
+
const item = items[nextIndex];
|
|
287
|
+
nextIndex += 1;
|
|
288
|
+
await worker(item);
|
|
289
|
+
}
|
|
290
|
+
}));
|
|
291
|
+
}
|
|
292
|
+
function createManifestLock() {
|
|
293
|
+
let queue = Promise.resolve();
|
|
294
|
+
return async function withManifestLock(task) {
|
|
295
|
+
const run = queue.then(task, task);
|
|
296
|
+
queue = run.then(() => undefined, () => undefined);
|
|
297
|
+
return run;
|
|
298
|
+
};
|
|
299
|
+
}
|
|
226
300
|
async function pruneOrphanedSummaries(summaryDir, manifest) {
|
|
227
301
|
const activeHashes = new Set(Object.values(manifest.files));
|
|
228
302
|
let entries = [];
|
|
@@ -266,9 +340,51 @@ async function runSummarize(options, config) {
|
|
|
266
340
|
: config.summarize.excludeGlobs.map(normalizeGlobPattern));
|
|
267
341
|
const ignoreFile = options.ignoreFile || config.summarize.ignoreFile;
|
|
268
342
|
const ignoreMatcher = await readIgnoreMatcher(repoPath, ignoreFile);
|
|
269
|
-
const
|
|
343
|
+
const customPromptHash = getPromptHash(config);
|
|
344
|
+
const customPromptSource = customPromptHash ? config.summarize.summaryPromptSource : undefined;
|
|
345
|
+
const summaryFreshnessExpected = (hash) => ({
|
|
346
|
+
hash,
|
|
347
|
+
promptVersion: llm_1.SUMMARY_PROMPT_VERSION,
|
|
348
|
+
summaryFormat: llm_1.SUMMARY_FORMAT,
|
|
349
|
+
customPromptHash,
|
|
350
|
+
provider: config.provider,
|
|
351
|
+
model: config.chat.model,
|
|
352
|
+
includeCodeSnapshot: options.includeCodeSnapshot
|
|
353
|
+
});
|
|
354
|
+
const totals = { scanned: 0, skipped: 0, updated: 0, refreshed: 0, failed: 0, pruned: 0 };
|
|
270
355
|
const failures = [];
|
|
271
356
|
const isJson = options.json;
|
|
357
|
+
const concurrency = config.summarize.concurrency;
|
|
358
|
+
const withManifestLock = createManifestLock();
|
|
359
|
+
const summaryAssetTasks = new Map();
|
|
360
|
+
async function ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot) {
|
|
361
|
+
const summaryPath = getSummaryPath(summaryDir, hash);
|
|
362
|
+
if (!options.refresh && await isSummaryAssetFresh(summaryPath, summaryFreshnessExpected(hash))) {
|
|
363
|
+
return false;
|
|
364
|
+
}
|
|
365
|
+
let task = summaryAssetTasks.get(hash);
|
|
366
|
+
if (!task) {
|
|
367
|
+
task = (async () => {
|
|
368
|
+
const generatedAt = new Date().toISOString();
|
|
369
|
+
const metadata = buildSummaryMetadata({
|
|
370
|
+
filePath,
|
|
371
|
+
hash,
|
|
372
|
+
rawCodeSnapshot,
|
|
373
|
+
config,
|
|
374
|
+
generatedAt,
|
|
375
|
+
customPromptHash,
|
|
376
|
+
customPromptSource
|
|
377
|
+
});
|
|
378
|
+
const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, metadata, config.chat, config.summarize.resolvedSummaryPrompt);
|
|
379
|
+
await ensureSummaryAsset(summaryDir, hash, metadata, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
|
|
380
|
+
return true;
|
|
381
|
+
})().finally(() => {
|
|
382
|
+
summaryAssetTasks.delete(hash);
|
|
383
|
+
});
|
|
384
|
+
summaryAssetTasks.set(hash, task);
|
|
385
|
+
}
|
|
386
|
+
return task;
|
|
387
|
+
}
|
|
272
388
|
if (!isJson) {
|
|
273
389
|
console.log(`Starting summarize run`);
|
|
274
390
|
console.log(`Mode: ${options.mode}`);
|
|
@@ -283,44 +399,51 @@ async function runSummarize(options, config) {
|
|
|
283
399
|
await writeManifest(manifestPath, manifest);
|
|
284
400
|
const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignoreMatcher);
|
|
285
401
|
const totalFiles = files.length;
|
|
402
|
+
let completedFiles = 0;
|
|
286
403
|
if (!isJson) {
|
|
287
404
|
console.log(`Candidates: ${totalFiles}`);
|
|
405
|
+
console.log(`Concurrency: ${concurrency}`);
|
|
288
406
|
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
407
|
+
await runWithConcurrency(files, concurrency, async (filePath) => {
|
|
408
|
+
await withManifestLock(async () => {
|
|
409
|
+
totals.scanned += 1;
|
|
410
|
+
});
|
|
292
411
|
try {
|
|
293
412
|
const absolutePath = node_path_1.default.join(repoPath, filePath);
|
|
294
413
|
const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
|
|
295
414
|
const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
}
|
|
415
|
+
await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
416
|
+
await withManifestLock(async () => {
|
|
417
|
+
manifest.files[filePath] = hash;
|
|
418
|
+
refs.set(hash, (refs.get(hash) || 0) + 1);
|
|
419
|
+
await writeManifest(manifestPath, manifest);
|
|
420
|
+
totals.updated += 1;
|
|
421
|
+
completedFiles += 1;
|
|
422
|
+
if (!isJson) {
|
|
423
|
+
console.log(`[${completedFiles}/${totalFiles}] summarized ${filePath}`);
|
|
424
|
+
}
|
|
425
|
+
});
|
|
308
426
|
}
|
|
309
427
|
catch (error) {
|
|
310
428
|
const message = error instanceof Error ? error.message : String(error);
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
429
|
+
await withManifestLock(async () => {
|
|
430
|
+
failures.push({ filePath, message });
|
|
431
|
+
totals.failed += 1;
|
|
432
|
+
completedFiles += 1;
|
|
433
|
+
if (!isJson) {
|
|
434
|
+
console.error(`[${completedFiles}/${totalFiles}] failed ${filePath}: ${message}`);
|
|
435
|
+
}
|
|
436
|
+
});
|
|
316
437
|
}
|
|
317
|
-
}
|
|
438
|
+
});
|
|
318
439
|
}
|
|
319
440
|
else {
|
|
320
441
|
const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
|
|
321
442
|
const totalCandidates = deltas.modifiedOrAdded.length + deltas.deleted.length;
|
|
443
|
+
let completedModified = 0;
|
|
322
444
|
if (!isJson) {
|
|
323
445
|
console.log(`Candidates: ${totalCandidates} (${deltas.modifiedOrAdded.length} modified/added, ${deltas.deleted.length} deleted)`);
|
|
446
|
+
console.log(`Concurrency: ${concurrency}`);
|
|
324
447
|
}
|
|
325
448
|
for (const deletedPath of deltas.deleted) {
|
|
326
449
|
const removed = await removeManifestPath(deletedPath, manifest, manifestPath, summaryDir, refs);
|
|
@@ -331,73 +454,91 @@ async function runSummarize(options, config) {
|
|
|
331
454
|
console.log(`pruned ${deletedPath}`);
|
|
332
455
|
}
|
|
333
456
|
}
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
457
|
+
await runWithConcurrency(deltas.modifiedOrAdded, concurrency, async (filePath) => {
|
|
458
|
+
await withManifestLock(async () => {
|
|
459
|
+
totals.scanned += 1;
|
|
460
|
+
});
|
|
337
461
|
try {
|
|
338
462
|
if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignoreMatcher)) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
463
|
+
await withManifestLock(async () => {
|
|
464
|
+
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
465
|
+
if (removed) {
|
|
466
|
+
totals.pruned += 1;
|
|
467
|
+
}
|
|
468
|
+
else {
|
|
469
|
+
totals.skipped += 1;
|
|
470
|
+
}
|
|
471
|
+
completedModified += 1;
|
|
472
|
+
if (!isJson) {
|
|
473
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] excluded ${filePath}`);
|
|
474
|
+
}
|
|
475
|
+
});
|
|
476
|
+
return;
|
|
350
477
|
}
|
|
351
478
|
const previousHash = manifest.files[filePath];
|
|
352
479
|
const absolutePath = node_path_1.default.join(repoPath, filePath);
|
|
353
480
|
const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
|
|
354
481
|
const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
|
|
355
482
|
if (previousHash === hash) {
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
483
|
+
const regenerated = await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
484
|
+
await withManifestLock(async () => {
|
|
485
|
+
if (regenerated) {
|
|
486
|
+
totals.refreshed += 1;
|
|
487
|
+
}
|
|
488
|
+
else {
|
|
489
|
+
totals.skipped += 1;
|
|
490
|
+
}
|
|
491
|
+
completedModified += 1;
|
|
492
|
+
if (!isJson) {
|
|
493
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] ${regenerated ? "refreshed" : "unchanged"} ${filePath}`);
|
|
494
|
+
}
|
|
495
|
+
});
|
|
496
|
+
return;
|
|
370
497
|
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
catch (error) {
|
|
379
|
-
const nodeError = error;
|
|
380
|
-
if (nodeError.code === "ENOENT") {
|
|
381
|
-
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
382
|
-
if (removed) {
|
|
383
|
-
totals.pruned += 1;
|
|
498
|
+
await ensureSummaryAssetForFile(filePath, hash, rawCodeSnapshot);
|
|
499
|
+
await withManifestLock(async () => {
|
|
500
|
+
const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
|
|
501
|
+
if (changed) {
|
|
502
|
+
totals.updated += 1;
|
|
384
503
|
}
|
|
385
504
|
else {
|
|
386
505
|
totals.skipped += 1;
|
|
387
506
|
}
|
|
507
|
+
completedModified += 1;
|
|
388
508
|
if (!isJson) {
|
|
389
|
-
console.log(`[${
|
|
509
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
|
|
390
510
|
}
|
|
391
|
-
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
catch (error) {
|
|
514
|
+
const nodeError = error;
|
|
515
|
+
if (nodeError.code === "ENOENT") {
|
|
516
|
+
await withManifestLock(async () => {
|
|
517
|
+
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
518
|
+
if (removed) {
|
|
519
|
+
totals.pruned += 1;
|
|
520
|
+
}
|
|
521
|
+
else {
|
|
522
|
+
totals.skipped += 1;
|
|
523
|
+
}
|
|
524
|
+
completedModified += 1;
|
|
525
|
+
if (!isJson) {
|
|
526
|
+
console.log(`[${completedModified}/${deltas.modifiedOrAdded.length}] missing ${filePath}`);
|
|
527
|
+
}
|
|
528
|
+
});
|
|
529
|
+
return;
|
|
392
530
|
}
|
|
393
531
|
const message = error instanceof Error ? error.message : String(error);
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
532
|
+
await withManifestLock(async () => {
|
|
533
|
+
failures.push({ filePath, message });
|
|
534
|
+
totals.failed += 1;
|
|
535
|
+
completedModified += 1;
|
|
536
|
+
if (!isJson) {
|
|
537
|
+
console.error(`[${completedModified}/${deltas.modifiedOrAdded.length}] failed ${filePath}: ${message}`);
|
|
538
|
+
}
|
|
539
|
+
});
|
|
399
540
|
}
|
|
400
|
-
}
|
|
541
|
+
});
|
|
401
542
|
}
|
|
402
543
|
manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
|
|
403
544
|
await writeManifest(manifestPath, manifest);
|
|
@@ -413,7 +554,7 @@ async function runSummarize(options, config) {
|
|
|
413
554
|
finishedAt: finishedAt.toISOString(),
|
|
414
555
|
durationMs,
|
|
415
556
|
totals,
|
|
416
|
-
failures
|
|
557
|
+
failures: failures.sort((a, b) => a.filePath.localeCompare(b.filePath))
|
|
417
558
|
};
|
|
418
559
|
if (isJson) {
|
|
419
560
|
console.log(JSON.stringify(report, null, 2));
|
|
@@ -423,6 +564,7 @@ async function runSummarize(options, config) {
|
|
|
423
564
|
console.log(`Summarize complete`);
|
|
424
565
|
console.log(`Scanned: ${totals.scanned}`);
|
|
425
566
|
console.log(`Updated: ${totals.updated}`);
|
|
567
|
+
console.log(`Refreshed: ${totals.refreshed}`);
|
|
426
568
|
console.log(`Skipped: ${totals.skipped}`);
|
|
427
569
|
console.log(`Pruned: ${totals.pruned}`);
|
|
428
570
|
console.log(`Failed: ${totals.failed}`);
|
package/dist/config.js
CHANGED
|
@@ -36,6 +36,14 @@ function readPositiveIntegerOption(value, envName, fallback) {
|
|
|
36
36
|
}
|
|
37
37
|
return parsed;
|
|
38
38
|
}
|
|
39
|
+
function readPromptOption(value, envName) {
|
|
40
|
+
const option = value ?? process.env[envName];
|
|
41
|
+
return option && option.trim() ? option : undefined;
|
|
42
|
+
}
|
|
43
|
+
function resolvePromptFile(promptFile) {
|
|
44
|
+
const resolvedPath = node_path_1.default.resolve(process.cwd(), promptFile);
|
|
45
|
+
return node_fs_1.default.readFileSync(resolvedPath, "utf8");
|
|
46
|
+
}
|
|
39
47
|
function loadRcFile(configPath) {
|
|
40
48
|
const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
|
|
41
49
|
if (!node_fs_1.default.existsSync(resolvedPath)) {
|
|
@@ -72,6 +80,14 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
72
80
|
const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
|
|
73
81
|
const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
|
|
74
82
|
const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
|
|
83
|
+
const summarizeConcurrency = readPositiveIntegerOption(mergedOptions.summarizeConcurrency, "DIFFDOC_SUMMARIZE_CONCURRENCY", 2);
|
|
84
|
+
const summaryPrompt = readPromptOption(mergedOptions.summaryPrompt, "DIFFDOC_SUMMARY_PROMPT");
|
|
85
|
+
const summaryPromptFile = readPromptOption(mergedOptions.summaryPromptFile, "DIFFDOC_SUMMARY_PROMPT_FILE");
|
|
86
|
+
if (summaryPrompt && summaryPromptFile) {
|
|
87
|
+
throw new Error("Configure either summaryPrompt or summaryPromptFile, not both.");
|
|
88
|
+
}
|
|
89
|
+
const resolvedSummaryPrompt = summaryPromptFile ? resolvePromptFile(summaryPromptFile) : summaryPrompt;
|
|
90
|
+
const summaryPromptSource = summaryPromptFile ? summaryPromptFile : summaryPrompt ? "inline" : undefined;
|
|
75
91
|
const chatBaseURL = provider === "cloud"
|
|
76
92
|
? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
|
|
77
93
|
: readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
|
|
@@ -116,7 +132,12 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
|
|
|
116
132
|
summarize: {
|
|
117
133
|
includeGlobs,
|
|
118
134
|
excludeGlobs,
|
|
119
|
-
ignoreFile
|
|
135
|
+
ignoreFile,
|
|
136
|
+
concurrency: summarizeConcurrency,
|
|
137
|
+
summaryPrompt,
|
|
138
|
+
summaryPromptFile,
|
|
139
|
+
resolvedSummaryPrompt,
|
|
140
|
+
summaryPromptSource
|
|
120
141
|
}
|
|
121
142
|
};
|
|
122
143
|
}
|
package/dist/index.js
CHANGED
|
@@ -42,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
|
|
|
42
42
|
program
|
|
43
43
|
.name("diffdoc")
|
|
44
44
|
.description("Translate repository code shifts into plain-English business context")
|
|
45
|
-
.version("0.
|
|
45
|
+
.version("0.6.0");
|
|
46
46
|
program
|
|
47
47
|
.command("init")
|
|
48
48
|
.description("Initialize DiffDoc configuration for this repository")
|
|
@@ -71,6 +71,10 @@ addChatOptions(addBaseOptions(program
|
|
|
71
71
|
.option("--include-glob <pattern>", "include glob pattern (repeatable)", collectOption, [])
|
|
72
72
|
.option("--exclude-glob <pattern>", "exclude glob pattern (repeatable)", collectOption, [])
|
|
73
73
|
.option("--ignore-file <path>", "path to ignore pattern file relative to --path")
|
|
74
|
+
.option("--summarize-concurrency <count>", "number of files to summarize concurrently")
|
|
75
|
+
.option("--summary-prompt <text>", "additional instructions for summary generation")
|
|
76
|
+
.option("--summary-prompt-file <path>", "path to additional summary prompt instructions")
|
|
77
|
+
.option("--refresh", "regenerate summaries even when source and summary metadata are fresh", false)
|
|
74
78
|
.action(async (options) => {
|
|
75
79
|
try {
|
|
76
80
|
const config = (0, config_1.buildRuntimeConfig)(options, { chat: true });
|
|
@@ -82,7 +86,8 @@ addChatOptions(addBaseOptions(program
|
|
|
82
86
|
json: options.json,
|
|
83
87
|
includeGlobs: options.includeGlob,
|
|
84
88
|
excludeGlobs: options.excludeGlob,
|
|
85
|
-
ignoreFile: options.ignoreFile
|
|
89
|
+
ignoreFile: options.ignoreFile,
|
|
90
|
+
refresh: options.refresh
|
|
86
91
|
}, config);
|
|
87
92
|
}
|
|
88
93
|
catch (error) {
|
package/dist/types/artifacts.js
CHANGED
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SUMMARY_ASSET_SCHEMA_VERSION = exports.MANIFEST_SCHEMA_VERSION = void 0;
|
|
4
4
|
exports.MANIFEST_SCHEMA_VERSION = 2;
|
|
5
|
-
exports.SUMMARY_ASSET_SCHEMA_VERSION =
|
|
5
|
+
exports.SUMMARY_ASSET_SCHEMA_VERSION = 2;
|
package/dist/utils/hashing.js
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.hashFileContent = hashFileContent;
|
|
4
|
+
exports.hashTextContent = hashTextContent;
|
|
4
5
|
const node_crypto_1 = require("node:crypto");
|
|
5
6
|
function hashFileContent(fileContent) {
|
|
6
7
|
return (0, node_crypto_1.createHash)("md5").update(fileContent, "utf8").digest("hex");
|
|
7
8
|
}
|
|
9
|
+
function hashTextContent(textContent) {
|
|
10
|
+
return (0, node_crypto_1.createHash)("sha256").update(textContent, "utf8").digest("hex");
|
|
11
|
+
}
|
package/dist/utils/llm.js
CHANGED
|
@@ -3,17 +3,108 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.SUMMARY_FORMAT = exports.SUMMARY_PROMPT_VERSION = void 0;
|
|
6
7
|
exports.generateFunctionalSummary = generateFunctionalSummary;
|
|
7
8
|
exports.generateAnswer = generateAnswer;
|
|
8
9
|
exports.generateEmbeddings = generateEmbeddings;
|
|
9
10
|
const openai_1 = __importDefault(require("openai"));
|
|
11
|
+
exports.SUMMARY_PROMPT_VERSION = 1;
|
|
12
|
+
exports.SUMMARY_FORMAT = "structured-functional-v1";
|
|
13
|
+
const SUMMARY_SYSTEM_PROMPT = `Generate a structured DiffDoc functional summary for the provided source file.
|
|
14
|
+
|
|
15
|
+
Required headings, exactly once and in this order:
|
|
16
|
+
## Metadata
|
|
17
|
+
## Purpose
|
|
18
|
+
## User-Visible Behavior
|
|
19
|
+
## Business Rules
|
|
20
|
+
## Data Inputs And Outputs
|
|
21
|
+
## Side Effects
|
|
22
|
+
## Error And Edge Cases
|
|
23
|
+
## Dependencies
|
|
24
|
+
## Operational Notes
|
|
25
|
+
|
|
26
|
+
Section guidance:
|
|
27
|
+
|
|
28
|
+
## Metadata
|
|
29
|
+
Include file-level context useful for search and retrieval. This section is mandatory and must contain every bullet below exactly once, in this order:
|
|
30
|
+
- File path: {copy the provided file path exactly}
|
|
31
|
+
- File name: {copy the provided file name exactly}
|
|
32
|
+
- Extension: {copy the provided extension exactly}
|
|
33
|
+
- Inferred language/type: {infer from file path, file name, extension, and code content}
|
|
34
|
+
- Content hash: {copy the provided content hash exactly}
|
|
35
|
+
- Line count: {copy the provided line count exactly}
|
|
36
|
+
- Byte size: {copy the provided byte size exactly}
|
|
37
|
+
- Summary format: {copy the provided summary format exactly}
|
|
38
|
+
- Notable symbols/classes/functions: {infer from code, or write "None identified."}
|
|
39
|
+
- External dependencies: {infer from imports, packages, runtime services, external APIs, or write "None identified."}
|
|
40
|
+
- Internal dependencies: {infer from project imports, local modules, local artifacts, or write "None identified."}
|
|
41
|
+
- Public API/exports: {infer exported functions, classes, types, routes, commands, tools, or write "None identified."}
|
|
42
|
+
|
|
43
|
+
## Purpose
|
|
44
|
+
Explain why this file exists and the main responsibility it serves.
|
|
45
|
+
Examples: handles login requests, builds a vector index, loads runtime configuration.
|
|
46
|
+
|
|
47
|
+
## User-Visible Behavior
|
|
48
|
+
Describe behavior users, operators, developers, or API consumers would observe.
|
|
49
|
+
Examples: CLI output, API responses, UI behavior, created/updated/deleted files, validation errors.
|
|
50
|
+
|
|
51
|
+
## Business Rules
|
|
52
|
+
Describe implemented rules, constraints, decisions, and policy-like behavior.
|
|
53
|
+
Examples: required fields, valid modes, filtering precedence, defaults, validation rules, skip conditions.
|
|
54
|
+
|
|
55
|
+
## Data Inputs And Outputs
|
|
56
|
+
Describe what data enters and leaves this file's behavior.
|
|
57
|
+
Examples: input files, config values, environment variables, function arguments, API payloads, generated artifacts, return values.
|
|
58
|
+
|
|
59
|
+
## Side Effects
|
|
60
|
+
Describe changes caused outside local computation.
|
|
61
|
+
Examples: writes files, deletes files, calls external services, updates indexes, logs output, mutates shared state, sends network requests.
|
|
62
|
+
|
|
63
|
+
## Error And Edge Cases
|
|
64
|
+
Describe failure handling and unusual conditions.
|
|
65
|
+
Examples: missing files, invalid config, unsupported schemas, empty results, network/model failures, deleted or unchanged files.
|
|
66
|
+
|
|
67
|
+
## Dependencies
|
|
68
|
+
Describe important internal and external dependencies.
|
|
69
|
+
Examples: imported packages, runtime services, local artifacts, external APIs, models/providers, framework components, project files.
|
|
70
|
+
|
|
71
|
+
## Operational Notes
|
|
72
|
+
Describe details useful for running, maintaining, scaling, or debugging.
|
|
73
|
+
Examples: concurrency, performance, idempotency, caching/reuse, schema implications, regeneration requirements, security/privacy considerations.
|
|
74
|
+
|
|
75
|
+
Rules:
|
|
76
|
+
- Use every heading exactly once.
|
|
77
|
+
- Use headings in the required order.
|
|
78
|
+
- Start with ## Metadata.
|
|
79
|
+
- Include provided deterministic metadata values exactly.
|
|
80
|
+
- Do not rename, omit, reorder, or merge Metadata bullets.
|
|
81
|
+
- Infer the language/type from the provided file path, file name, extension, and code content. Prefer code content when extension is ambiguous. If uncertain, provide the best likely language/type and briefly note uncertainty.
|
|
82
|
+
- Let the code identify symbols, classes, functions, and dependencies. Include important identifiers when useful for search.
|
|
83
|
+
- If a section has no applicable content, write "None identified."
|
|
84
|
+
- Do not invent behavior, requirements, dependencies, or intent not supported by the code.
|
|
85
|
+
- Summarize implemented behavior only.
|
|
86
|
+
- Prefer specific behavior over generic descriptions.
|
|
87
|
+
- Use plain English.
|
|
88
|
+
- Provide zero conversational preamble.
|
|
89
|
+
- Do not include Markdown sections outside the required headings.`;
|
|
10
90
|
function createClient(config) {
|
|
11
91
|
return {
|
|
12
92
|
client: new openai_1.default({ apiKey: config.apiKey, baseURL: config.baseURL }),
|
|
13
93
|
model: config.model
|
|
14
94
|
};
|
|
15
95
|
}
|
|
16
|
-
|
|
96
|
+
function formatMetadataForPrompt(metadata) {
|
|
97
|
+
return [
|
|
98
|
+
`- File path: ${metadata.file_path}`,
|
|
99
|
+
`- File name: ${metadata.file_name}`,
|
|
100
|
+
`- Extension: ${metadata.extension || "None"}`,
|
|
101
|
+
`- Content hash: ${metadata.content_hash}`,
|
|
102
|
+
`- Line count: ${metadata.line_count}`,
|
|
103
|
+
`- Byte size: ${metadata.byte_size}`,
|
|
104
|
+
`- Summary format: ${metadata.summary_format}`
|
|
105
|
+
].join("\n");
|
|
106
|
+
}
|
|
107
|
+
async function generateFunctionalSummary(fileName, codeContent, metadata, config, customPrompt) {
|
|
17
108
|
const { client, model } = createClient(config);
|
|
18
109
|
const response = await client.chat.completions.create({
|
|
19
110
|
model,
|
|
@@ -21,11 +112,11 @@ async function generateFunctionalSummary(fileName, codeContent, config) {
|
|
|
21
112
|
messages: [
|
|
22
113
|
{
|
|
23
114
|
role: "system",
|
|
24
|
-
content:
|
|
115
|
+
content: SUMMARY_SYSTEM_PROMPT
|
|
25
116
|
},
|
|
26
117
|
{
|
|
27
118
|
role: "user",
|
|
28
|
-
content: `File: ${fileName}\n\nCode:\n${codeContent}`
|
|
119
|
+
content: `File: ${fileName}\n\nProvided metadata:\n${formatMetadataForPrompt(metadata)}\n\nConsumer instructions:\n${customPrompt && customPrompt.trim() ? customPrompt.trim() : "None."}\n\nCode:\n${codeContent}`
|
|
29
120
|
}
|
|
30
121
|
]
|
|
31
122
|
});
|