diffdoc 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,166 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.runStatus = runStatus;
7
+ const promises_1 = __importDefault(require("node:fs/promises"));
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ const vectra_1 = require("vectra");
10
+ const embed_1 = require("./embed");
11
+ const artifacts_1 = require("../types/artifacts");
12
+ const paths_1 = require("../utils/paths");
13
+ function getSummaryDir(manifestPath) {
14
+ return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
15
+ }
16
+ async function readManifest(manifestPath) {
17
+ let parsed;
18
+ try {
19
+ parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
20
+ }
21
+ catch (error) {
22
+ const nodeError = error;
23
+ if (nodeError.code === "ENOENT") {
24
+ throw new Error(`Manifest not found: ${manifestPath}. Run \"diffdoc summarize\" first.`);
25
+ }
26
+ throw error;
27
+ }
28
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
29
+ throw new Error(`Invalid manifest JSON in ${manifestPath}. Expected an object.`);
30
+ }
31
+ const manifest = parsed;
32
+ if (manifest.schemaVersion !== artifacts_1.MANIFEST_SCHEMA_VERSION) {
33
+ throw new Error(`Unsupported manifest schema in ${manifestPath}. Expected schemaVersion ${artifacts_1.MANIFEST_SCHEMA_VERSION}.`);
34
+ }
35
+ return {
36
+ schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
37
+ lastSyncedCommit: typeof manifest.lastSyncedCommit === "string" ? manifest.lastSyncedCommit : "",
38
+ files: manifest.files && typeof manifest.files === "object" ? manifest.files : {}
39
+ };
40
+ }
41
+ async function getSummaryStats(manifestPath, manifest) {
42
+ const summaryDir = getSummaryDir(manifestPath);
43
+ let entries = [];
44
+ try {
45
+ entries = await promises_1.default.readdir(summaryDir);
46
+ }
47
+ catch (error) {
48
+ const nodeError = error;
49
+ if (nodeError.code !== "ENOENT") {
50
+ throw error;
51
+ }
52
+ }
53
+ const summaryHashes = new Set(entries.filter((entry) => entry.endsWith(".json")).map((entry) => entry.slice(0, -5)));
54
+ const manifestHashes = new Set(Object.values(manifest.files));
55
+ let orphanCount = 0;
56
+ for (const hash of summaryHashes) {
57
+ if (!manifestHashes.has(hash)) {
58
+ orphanCount += 1;
59
+ }
60
+ }
61
+ let missingFromManifestCount = 0;
62
+ for (const hash of manifestHashes) {
63
+ if (!summaryHashes.has(hash)) {
64
+ missingFromManifestCount += 1;
65
+ }
66
+ }
67
+ return {
68
+ summaryFileCount: summaryHashes.size,
69
+ orphanCount,
70
+ missingFromManifestCount
71
+ };
72
+ }
73
+ async function getIndexFreshness(manifest, config) {
74
+ const indexPath = (0, embed_1.getVectraIndexPath)(config);
75
+ const index = new vectra_1.LocalIndex(indexPath);
76
+ const exists = await index.isIndexCreated();
77
+ if (!exists) {
78
+ return {
79
+ status: "missing",
80
+ missing: 0,
81
+ mismatched: 0,
82
+ extra: 0
83
+ };
84
+ }
85
+ const items = await index.listItems();
86
+ const indexHashesByPath = new Map();
87
+ for (const item of items) {
88
+ if (!item.id || typeof item.id !== "string") {
89
+ continue;
90
+ }
91
+ const hash = item.metadata && typeof item.metadata.hash === "string"
92
+ ? item.metadata.hash
93
+ : "";
94
+ indexHashesByPath.set(item.id, hash);
95
+ }
96
+ let missing = 0;
97
+ let mismatched = 0;
98
+ for (const [filePath, manifestHash] of Object.entries(manifest.files)) {
99
+ const indexedHash = indexHashesByPath.get(filePath);
100
+ if (indexedHash === undefined) {
101
+ missing += 1;
102
+ continue;
103
+ }
104
+ if (indexedHash !== manifestHash) {
105
+ mismatched += 1;
106
+ }
107
+ }
108
+ const manifestPathSet = new Set(Object.keys(manifest.files));
109
+ let extra = 0;
110
+ for (const filePath of indexHashesByPath.keys()) {
111
+ if (!manifestPathSet.has(filePath)) {
112
+ extra += 1;
113
+ }
114
+ }
115
+ return {
116
+ status: missing === 0 && mismatched === 0 && extra === 0 ? "fresh" : "stale",
117
+ missing,
118
+ mismatched,
119
+ extra
120
+ };
121
+ }
122
+ function formatSummaryFreshness(stats) {
123
+ if (stats.missingFromManifestCount === 0) {
124
+ return "fresh";
125
+ }
126
+ return `stale (missing: ${stats.missingFromManifestCount})`;
127
+ }
128
+ function buildStatusReport(manifest, summaryStats, indexFreshness) {
129
+ return {
130
+ manifestSchema: manifest.schemaVersion,
131
+ trackedFileCount: Object.keys(manifest.files).length,
132
+ summaryFileCount: summaryStats.summaryFileCount,
133
+ orphanCount: summaryStats.orphanCount,
134
+ summaryFreshness: {
135
+ status: summaryStats.missingFromManifestCount === 0 ? "fresh" : "stale",
136
+ missing: summaryStats.missingFromManifestCount
137
+ },
138
+ indexFreshness
139
+ };
140
+ }
141
+ function formatIndexFreshness(freshness) {
142
+ if (freshness.status === "missing") {
143
+ return "missing";
144
+ }
145
+ if (freshness.status === "fresh") {
146
+ return "fresh";
147
+ }
148
+ return `stale (missing: ${freshness.missing}, mismatched: ${freshness.mismatched}, extra: ${freshness.extra})`;
149
+ }
150
+ async function runStatus(options, config) {
151
+ const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.manifest, config.baseDir);
152
+ const manifest = await readManifest(manifestPath);
153
+ const summaryStats = await getSummaryStats(manifestPath, manifest);
154
+ const indexFreshness = await getIndexFreshness(manifest, config);
155
+ const report = buildStatusReport(manifest, summaryStats, indexFreshness);
156
+ if (options.json) {
157
+ console.log(JSON.stringify(report, null, 2));
158
+ return;
159
+ }
160
+ console.log(`manifest schema: ${report.manifestSchema}`);
161
+ console.log(`tracked files: ${report.trackedFileCount}`);
162
+ console.log(`summary files: ${report.summaryFileCount}`);
163
+ console.log(`orphans: ${report.orphanCount}`);
164
+ console.log(`summary freshness: ${formatSummaryFreshness(summaryStats)}`);
165
+ console.log(`index freshness: ${formatIndexFreshness(indexFreshness)}`);
166
+ }
@@ -6,108 +6,432 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.runSummarize = runSummarize;
7
7
  const promises_1 = __importDefault(require("node:fs/promises"));
8
8
  const node_path_1 = __importDefault(require("node:path"));
9
+ const artifacts_1 = require("../types/artifacts");
9
10
  const git_1 = require("../utils/git");
10
11
  const hashing_1 = require("../utils/hashing");
11
12
  const llm_1 = require("../utils/llm");
12
13
  const paths_1 = require("../utils/paths");
13
- const TARGET_EXTENSIONS = new Set([".ts", ".js", ".cs", ".py"]);
14
- const IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist"]);
15
- const IGNORED_FILES = new Set(["package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb"]);
16
14
  function normalizeRelativePath(filePath) {
17
15
  return filePath.split(node_path_1.default.sep).join("/");
18
16
  }
19
- function isTargetCodeFile(filePath) {
20
- return TARGET_EXTENSIONS.has(node_path_1.default.extname(filePath)) && !IGNORED_FILES.has(node_path_1.default.basename(filePath));
17
+ function getSummaryDir(manifestPath) {
18
+ return node_path_1.default.resolve(node_path_1.default.dirname(manifestPath), "summaries");
19
+ }
20
+ function getSummaryPath(summaryDir, hash) {
21
+ return node_path_1.default.resolve(summaryDir, `${hash}.json`);
22
+ }
23
+ function normalizeGlobPattern(pattern) {
24
+ return pattern.split(node_path_1.default.sep).join("/");
25
+ }
26
+ function escapeRegex(value) {
27
+ return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
28
+ }
29
+ function globToRegExp(pattern) {
30
+ const normalized = normalizeGlobPattern(pattern);
31
+ let regexBody = "";
32
+ for (let i = 0; i < normalized.length; i += 1) {
33
+ const char = normalized[i];
34
+ const next = normalized[i + 1];
35
+ if (char === "*" && next === "*") {
36
+ regexBody += ".*";
37
+ i += 1;
38
+ continue;
39
+ }
40
+ if (char === "*") {
41
+ regexBody += "[^/]*";
42
+ continue;
43
+ }
44
+ if (char === "?") {
45
+ regexBody += "[^/]";
46
+ continue;
47
+ }
48
+ regexBody += escapeRegex(char);
49
+ }
50
+ return new RegExp(`^${regexBody}$`);
51
+ }
52
+ function compileGlobs(patterns) {
53
+ return patterns.filter(Boolean).map(globToRegExp);
54
+ }
55
+ function matchesAny(filePath, patterns) {
56
+ return patterns.some((pattern) => pattern.test(filePath));
57
+ }
58
+ function shouldIncludeFile(filePath, includeGlobs, excludeGlobs, ignoreGlobs) {
59
+ if (includeGlobs.length > 0 && !matchesAny(filePath, includeGlobs)) {
60
+ return false;
61
+ }
62
+ if (excludeGlobs.length > 0 && matchesAny(filePath, excludeGlobs)) {
63
+ return false;
64
+ }
65
+ if (ignoreGlobs.length > 0 && matchesAny(filePath, ignoreGlobs)) {
66
+ return false;
67
+ }
68
+ return true;
69
+ }
70
+ async function fileExists(filePath) {
71
+ try {
72
+ await promises_1.default.access(filePath);
73
+ return true;
74
+ }
75
+ catch {
76
+ return false;
77
+ }
78
+ }
79
+ async function atomicWriteUtf8(targetPath, content) {
80
+ await promises_1.default.mkdir(node_path_1.default.dirname(targetPath), { recursive: true });
81
+ const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
82
+ const handle = await promises_1.default.open(tempPath, "w");
83
+ try {
84
+ await handle.writeFile(content, "utf8");
85
+ await handle.sync();
86
+ }
87
+ finally {
88
+ await handle.close();
89
+ }
90
+ await promises_1.default.rename(tempPath, targetPath);
91
+ }
92
+ async function writeManifest(manifestPath, manifest) {
93
+ await atomicWriteUtf8(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
94
+ }
95
+ async function writeSummaryAsset(summaryPath, summary) {
96
+ await atomicWriteUtf8(summaryPath, `${JSON.stringify(summary, null, 2)}\n`);
21
97
  }
22
98
  async function readManifest(manifestPath) {
23
99
  try {
24
- return JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
100
+ const parsed = JSON.parse(await promises_1.default.readFile(manifestPath, "utf8"));
101
+ if (parsed.schemaVersion !== artifacts_1.MANIFEST_SCHEMA_VERSION) {
102
+ throw new Error(`Unsupported manifest schema in ${manifestPath}. Expected schemaVersion ${artifacts_1.MANIFEST_SCHEMA_VERSION}.`);
103
+ }
104
+ return {
105
+ schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
106
+ lastSyncedCommit: typeof parsed.lastSyncedCommit === "string" ? parsed.lastSyncedCommit : "",
107
+ files: parsed.files && typeof parsed.files === "object" ? parsed.files : {}
108
+ };
25
109
  }
26
110
  catch (error) {
27
111
  const nodeError = error;
28
112
  if (nodeError.code === "ENOENT") {
29
- return { lastSyncedCommit: "", files: {} };
113
+ return {
114
+ schemaVersion: artifacts_1.MANIFEST_SCHEMA_VERSION,
115
+ lastSyncedCommit: "",
116
+ files: {}
117
+ };
30
118
  }
31
119
  throw error;
32
120
  }
33
121
  }
34
- async function writeManifest(manifestPath, manifest) {
35
- await promises_1.default.mkdir(node_path_1.default.dirname(manifestPath), { recursive: true });
36
- await promises_1.default.writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
122
+ async function readIgnorePatterns(repoPath, ignoreFilePath) {
123
+ const absolutePath = node_path_1.default.isAbsolute(ignoreFilePath)
124
+ ? ignoreFilePath
125
+ : node_path_1.default.resolve(repoPath, ignoreFilePath);
126
+ try {
127
+ const raw = await promises_1.default.readFile(absolutePath, "utf8");
128
+ return raw
129
+ .split(/\r?\n/)
130
+ .map((line) => line.trim())
131
+ .filter((line) => line.length > 0 && !line.startsWith("#"))
132
+ .map(normalizeGlobPattern);
133
+ }
134
+ catch (error) {
135
+ const nodeError = error;
136
+ if (nodeError.code === "ENOENT") {
137
+ return [];
138
+ }
139
+ throw error;
140
+ }
37
141
  }
38
- async function walkCodeFiles(rootPath, currentPath = rootPath) {
142
+ async function walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreGlobs, currentPath = rootPath) {
39
143
  const entries = await promises_1.default.readdir(currentPath, { withFileTypes: true });
40
144
  const files = [];
41
145
  for (const entry of entries) {
42
146
  const entryPath = node_path_1.default.join(currentPath, entry.name);
43
147
  if (entry.isDirectory()) {
44
- if (!IGNORED_DIRECTORIES.has(entry.name)) {
45
- files.push(...await walkCodeFiles(rootPath, entryPath));
46
- }
148
+ files.push(...await walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreGlobs, entryPath));
47
149
  continue;
48
150
  }
49
- if (entry.isFile() && isTargetCodeFile(entry.name)) {
50
- files.push(normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath)));
151
+ if (entry.isFile()) {
152
+ const relativePath = normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath));
153
+ if (shouldIncludeFile(relativePath, includeGlobs, excludeGlobs, ignoreGlobs)) {
154
+ files.push(relativePath);
155
+ }
51
156
  }
52
157
  }
53
158
  return files.sort();
54
159
  }
55
- async function summarizeFile(rootPath, relativePath, config) {
56
- const absolutePath = node_path_1.default.join(rootPath, relativePath);
57
- const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
58
- return {
59
- hash: (0, hashing_1.hashFileContent)(rawCodeSnapshot),
60
- summaryText: await (0, llm_1.generateFunctionalSummary)(relativePath, rawCodeSnapshot, config.chat),
61
- rawCodeSnapshot
160
+ function countHashRefs(files) {
161
+ const refs = new Map();
162
+ for (const hash of Object.values(files)) {
163
+ refs.set(hash, (refs.get(hash) || 0) + 1);
164
+ }
165
+ return refs;
166
+ }
167
+ async function deleteSummaryIfUnreferenced(summaryDir, hash, refs) {
168
+ if ((refs.get(hash) || 0) > 0) {
169
+ return;
170
+ }
171
+ const summaryPath = getSummaryPath(summaryDir, hash);
172
+ try {
173
+ await promises_1.default.unlink(summaryPath);
174
+ }
175
+ catch (error) {
176
+ const nodeError = error;
177
+ if (nodeError.code !== "ENOENT") {
178
+ throw error;
179
+ }
180
+ }
181
+ }
182
+ async function setManifestPathHash(filePath, newHash, manifest, manifestPath, summaryDir, refs) {
183
+ const previousHash = manifest.files[filePath];
184
+ if (previousHash === newHash) {
185
+ return false;
186
+ }
187
+ if (previousHash) {
188
+ refs.set(previousHash, Math.max((refs.get(previousHash) || 1) - 1, 0));
189
+ }
190
+ manifest.files[filePath] = newHash;
191
+ refs.set(newHash, (refs.get(newHash) || 0) + 1);
192
+ await writeManifest(manifestPath, manifest);
193
+ if (previousHash) {
194
+ await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
195
+ }
196
+ return true;
197
+ }
198
+ async function removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs) {
199
+ const previousHash = manifest.files[filePath];
200
+ if (!previousHash) {
201
+ return false;
202
+ }
203
+ delete manifest.files[filePath];
204
+ refs.set(previousHash, Math.max((refs.get(previousHash) || 1) - 1, 0));
205
+ await writeManifest(manifestPath, manifest);
206
+ await deleteSummaryIfUnreferenced(summaryDir, previousHash, refs);
207
+ return true;
208
+ }
209
+ async function ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, includeCodeSnapshot) {
210
+ const summaryPath = getSummaryPath(summaryDir, hash);
211
+ if (await fileExists(summaryPath)) {
212
+ return;
213
+ }
214
+ const summary = {
215
+ schemaVersion: artifacts_1.SUMMARY_ASSET_SCHEMA_VERSION,
216
+ content_hash: hash,
217
+ summary: summaryText,
218
+ raw_code_snapshot: includeCodeSnapshot ? rawCodeSnapshot : undefined
62
219
  };
220
+ await writeSummaryAsset(summaryPath, summary);
221
+ }
222
+ async function pruneOrphanedSummaries(summaryDir, manifest) {
223
+ const activeHashes = new Set(Object.values(manifest.files));
224
+ let entries = [];
225
+ try {
226
+ entries = await promises_1.default.readdir(summaryDir);
227
+ }
228
+ catch (error) {
229
+ const nodeError = error;
230
+ if (nodeError.code === "ENOENT") {
231
+ return;
232
+ }
233
+ throw error;
234
+ }
235
+ for (const entry of entries) {
236
+ if (!entry.endsWith(".json")) {
237
+ continue;
238
+ }
239
+ const hash = entry.slice(0, -5);
240
+ if (activeHashes.has(hash)) {
241
+ continue;
242
+ }
243
+ await promises_1.default.unlink(node_path_1.default.resolve(summaryDir, entry));
244
+ }
63
245
  }
64
246
  async function runSummarize(options, config) {
65
247
  if (options.mode !== "all" && options.mode !== "delta") {
66
248
  throw new Error('Invalid summarize mode. Expected "all" or "delta".');
67
249
  }
250
+ const startedAt = new Date();
68
251
  const commandCwd = process.cwd();
69
252
  const repoPath = node_path_1.default.resolve(commandCwd, options.path);
70
253
  const manifestPath = (0, paths_1.resolveDiffdocArtifactPath)(options.out, config.baseDir);
71
- const manifest = options.mode === "delta" ? await readManifest(manifestPath) : { lastSyncedCommit: "", files: {} };
254
+ const summaryDir = getSummaryDir(manifestPath);
255
+ const manifest = await readManifest(manifestPath);
256
+ const refs = countHashRefs(manifest.files);
257
+ const includePatterns = compileGlobs((options.includeGlobs && options.includeGlobs.length > 0)
258
+ ? options.includeGlobs.map(normalizeGlobPattern)
259
+ : config.summarize.includeGlobs.map(normalizeGlobPattern));
260
+ const excludePatterns = compileGlobs((options.excludeGlobs && options.excludeGlobs.length > 0)
261
+ ? options.excludeGlobs.map(normalizeGlobPattern)
262
+ : config.summarize.excludeGlobs.map(normalizeGlobPattern));
263
+ const ignoreFile = options.ignoreFile || config.summarize.ignoreFile;
264
+ const ignorePatterns = compileGlobs(await readIgnorePatterns(repoPath, ignoreFile));
265
+ const totals = { scanned: 0, skipped: 0, updated: 0, failed: 0, pruned: 0 };
266
+ const failures = [];
267
+ const isJson = options.json;
268
+ if (!isJson) {
269
+ console.log(`Starting summarize run`);
270
+ console.log(`Mode: ${options.mode}`);
271
+ console.log(`Repo: ${repoPath}`);
272
+ console.log(`Manifest: ${manifestPath}`);
273
+ console.log(`Summaries: ${summaryDir}`);
274
+ console.log("---");
275
+ }
72
276
  if (options.mode === "all") {
73
- const files = await walkCodeFiles(repoPath);
74
277
  manifest.files = {};
75
- for (const filePath of files) {
76
- manifest.files[filePath] = await summarizeFile(repoPath, filePath, config);
77
- console.log(`Summarized ${filePath}`);
278
+ refs.clear();
279
+ await writeManifest(manifestPath, manifest);
280
+ const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignorePatterns);
281
+ const totalFiles = files.length;
282
+ if (!isJson) {
283
+ console.log(`Candidates: ${totalFiles}`);
284
+ }
285
+ for (let i = 0; i < files.length; i += 1) {
286
+ const filePath = files[i];
287
+ totals.scanned += 1;
288
+ try {
289
+ const absolutePath = node_path_1.default.join(repoPath, filePath);
290
+ const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
291
+ const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
292
+ const summaryPath = getSummaryPath(summaryDir, hash);
293
+ if (!await fileExists(summaryPath)) {
294
+ const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
295
+ await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
296
+ }
297
+ manifest.files[filePath] = hash;
298
+ refs.set(hash, (refs.get(hash) || 0) + 1);
299
+ await writeManifest(manifestPath, manifest);
300
+ totals.updated += 1;
301
+ if (!isJson) {
302
+ console.log(`[${i + 1}/${totalFiles}] summarized ${filePath}`);
303
+ }
304
+ }
305
+ catch (error) {
306
+ const message = error instanceof Error ? error.message : String(error);
307
+ failures.push({ filePath, message });
308
+ totals.failed += 1;
309
+ if (!isJson) {
310
+ console.error(`[${i + 1}/${totalFiles}] failed ${filePath}: ${message}`);
311
+ }
312
+ }
78
313
  }
79
314
  }
80
315
  else {
81
316
  const deltas = await (0, git_1.getGitDeltas)(repoPath, manifest.lastSyncedCommit);
317
+ const totalCandidates = deltas.modifiedOrAdded.length + deltas.deleted.length;
318
+ if (!isJson) {
319
+ console.log(`Candidates: ${totalCandidates} (${deltas.modifiedOrAdded.length} modified/added, ${deltas.deleted.length} deleted)`);
320
+ }
82
321
  for (const deletedPath of deltas.deleted) {
83
- delete manifest.files[deletedPath];
84
- console.log(`Pruned ${deletedPath}`);
322
+ const removed = await removeManifestPath(deletedPath, manifest, manifestPath, summaryDir, refs);
323
+ if (removed) {
324
+ totals.pruned += 1;
325
+ }
326
+ if (!isJson) {
327
+ console.log(`pruned ${deletedPath}`);
328
+ }
85
329
  }
86
- for (const filePath of deltas.modifiedOrAdded) {
87
- const absolutePath = node_path_1.default.join(repoPath, filePath);
330
+ for (let i = 0; i < deltas.modifiedOrAdded.length; i += 1) {
331
+ const filePath = deltas.modifiedOrAdded[i];
332
+ totals.scanned += 1;
88
333
  try {
334
+ if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignorePatterns)) {
335
+ const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
336
+ if (removed) {
337
+ totals.pruned += 1;
338
+ }
339
+ else {
340
+ totals.skipped += 1;
341
+ }
342
+ if (!isJson) {
343
+ console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] excluded ${filePath}`);
344
+ }
345
+ continue;
346
+ }
347
+ const previousHash = manifest.files[filePath];
348
+ const absolutePath = node_path_1.default.join(repoPath, filePath);
89
349
  const rawCodeSnapshot = await promises_1.default.readFile(absolutePath, "utf8");
90
350
  const hash = (0, hashing_1.hashFileContent)(rawCodeSnapshot);
91
- if (manifest.files[filePath]?.hash === hash)
351
+ if (previousHash === hash) {
352
+ totals.skipped += 1;
353
+ if (!isJson) {
354
+ console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] unchanged ${filePath}`);
355
+ }
92
356
  continue;
93
- manifest.files[filePath] = {
94
- hash,
95
- summaryText: await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat),
96
- rawCodeSnapshot
97
- };
98
- console.log(`Updated ${filePath}`);
357
+ }
358
+ const summaryPath = getSummaryPath(summaryDir, hash);
359
+ if (!await fileExists(summaryPath)) {
360
+ const summaryText = await (0, llm_1.generateFunctionalSummary)(filePath, rawCodeSnapshot, config.chat);
361
+ await ensureSummaryAsset(summaryDir, hash, summaryText, rawCodeSnapshot, options.includeCodeSnapshot);
362
+ }
363
+ const changed = await setManifestPathHash(filePath, hash, manifest, manifestPath, summaryDir, refs);
364
+ if (changed) {
365
+ totals.updated += 1;
366
+ }
367
+ else {
368
+ totals.skipped += 1;
369
+ }
370
+ if (!isJson) {
371
+ console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] updated ${filePath}`);
372
+ }
99
373
  }
100
374
  catch (error) {
101
375
  const nodeError = error;
102
376
  if (nodeError.code === "ENOENT") {
103
- delete manifest.files[filePath];
377
+ const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
378
+ if (removed) {
379
+ totals.pruned += 1;
380
+ }
381
+ else {
382
+ totals.skipped += 1;
383
+ }
384
+ if (!isJson) {
385
+ console.log(`[${i + 1}/${deltas.modifiedOrAdded.length}] missing ${filePath}`);
386
+ }
104
387
  continue;
105
388
  }
106
- throw error;
389
+ const message = error instanceof Error ? error.message : String(error);
390
+ failures.push({ filePath, message });
391
+ totals.failed += 1;
392
+ if (!isJson) {
393
+ console.error(`[${i + 1}/${deltas.modifiedOrAdded.length}] failed ${filePath}: ${message}`);
394
+ }
107
395
  }
108
396
  }
109
397
  }
110
398
  manifest.lastSyncedCommit = await (0, git_1.getCurrentCommit)(repoPath);
111
399
  await writeManifest(manifestPath, manifest);
112
- console.log(`Wrote manifest to ${manifestPath}`);
400
+ await pruneOrphanedSummaries(summaryDir, manifest);
401
+ const finishedAt = new Date();
402
+ const durationMs = finishedAt.getTime() - startedAt.getTime();
403
+ const report = {
404
+ mode: options.mode,
405
+ repoPath,
406
+ manifestPath,
407
+ summaryDir,
408
+ startedAt: startedAt.toISOString(),
409
+ finishedAt: finishedAt.toISOString(),
410
+ durationMs,
411
+ totals,
412
+ failures
413
+ };
414
+ if (isJson) {
415
+ console.log(JSON.stringify(report, null, 2));
416
+ }
417
+ else {
418
+ console.log("---");
419
+ console.log(`Summarize complete`);
420
+ console.log(`Scanned: ${totals.scanned}`);
421
+ console.log(`Updated: ${totals.updated}`);
422
+ console.log(`Skipped: ${totals.skipped}`);
423
+ console.log(`Pruned: ${totals.pruned}`);
424
+ console.log(`Failed: ${totals.failed}`);
425
+ console.log(`Duration: ${(durationMs / 1000).toFixed(2)}s`);
426
+ console.log(`Manifest: ${manifestPath}`);
427
+ }
428
+ if (failures.length > 0) {
429
+ if (!isJson) {
430
+ console.error(`\n${failures.length} file(s) failed during summarization:`);
431
+ for (const failure of failures) {
432
+ console.error(`- ${failure.filePath}: ${failure.message}`);
433
+ }
434
+ }
435
+ throw new Error("Summarization completed with failures.");
436
+ }
113
437
  }
package/dist/config.js CHANGED
@@ -9,6 +9,22 @@ const node_path_1 = __importDefault(require("node:path"));
9
9
  function readOption(value, envName, fallback = "") {
10
10
  return value || process.env[envName] || fallback;
11
11
  }
12
+ function parseCsv(value) {
13
+ return value.split(",").map((item) => item.trim()).filter(Boolean);
14
+ }
15
+ function readListOption(value, envName, fallback = []) {
16
+ if (Array.isArray(value)) {
17
+ return value.flatMap((item) => parseCsv(item)).filter(Boolean);
18
+ }
19
+ if (typeof value === "string" && value.trim()) {
20
+ return parseCsv(value);
21
+ }
22
+ const envValue = process.env[envName];
23
+ if (envValue && envValue.trim()) {
24
+ return parseCsv(envValue);
25
+ }
26
+ return fallback;
27
+ }
12
28
  function loadRcFile(configPath) {
13
29
  const resolvedPath = node_path_1.default.resolve(process.cwd(), configPath || ".diffdocrc");
14
30
  if (!node_fs_1.default.existsSync(resolvedPath)) {
@@ -41,6 +57,9 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
41
57
  const mergedOptions = mergeConfigOptions(options);
42
58
  const provider = readProvider(mergedOptions.aiProvider);
43
59
  const apiKey = readOption(mergedOptions.openaiApiKey, "OPENAI_API_KEY", provider === "local" ? "local-key" : "");
60
+ const includeGlobs = readListOption(mergedOptions.includeGlobs, "DIFFDOC_INCLUDE_GLOBS");
61
+ const excludeGlobs = readListOption(mergedOptions.excludeGlobs, "DIFFDOC_EXCLUDE_GLOBS");
62
+ const ignoreFile = readOption(mergedOptions.ignoreFile, "DIFFDOC_IGNORE_FILE", ".diffdocignore");
44
63
  const chatBaseURL = provider === "cloud"
45
64
  ? readOption(mergedOptions.cloudLlmEndpoint, "CLOUD_LLM_ENDPOINT", "https://api.openai.com/v1")
46
65
  : readOption(mergedOptions.localLlmEndpoint, "LOCAL_LLM_ENDPOINT");
@@ -80,6 +99,11 @@ function buildRuntimeConfig(options, needs = { chat: true, embeddings: true }) {
80
99
  apiKey,
81
100
  baseURL: embedBaseURL,
82
101
  model: embedModel
102
+ },
103
+ summarize: {
104
+ includeGlobs,
105
+ excludeGlobs,
106
+ ignoreFile
83
107
  }
84
108
  };
85
109
  }