preflight-mcp 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -142
- package/README.zh-CN.md +141 -124
- package/dist/ast/treeSitter.js +588 -0
- package/dist/bundle/analysis.js +47 -0
- package/dist/bundle/context7.js +65 -36
- package/dist/bundle/facts.js +829 -0
- package/dist/bundle/githubArchive.js +49 -28
- package/dist/bundle/overview.js +226 -48
- package/dist/bundle/service.js +27 -126
- package/dist/config.js +29 -3
- package/dist/context7/client.js +5 -2
- package/dist/evidence/dependencyGraph.js +826 -0
- package/dist/http/server.js +109 -0
- package/dist/search/sqliteFts.js +150 -10
- package/dist/server.js +84 -295
- package/dist/trace/service.js +108 -0
- package/dist/trace/store.js +170 -0
- package/package.json +4 -2
- package/dist/bundle/deepwiki.js +0 -206
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import fs from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import AdmZip from 'adm-zip';
|
|
4
|
+
import { logger } from '../logging/logger.js';
|
|
4
5
|
function nowIso() {
|
|
5
6
|
return new Date().toISOString();
|
|
6
7
|
}
|
|
7
8
|
function githubHeaders(cfg) {
|
|
8
9
|
const headers = {
|
|
9
|
-
'User-Agent': 'preflight-mcp/0.1.
|
|
10
|
+
'User-Agent': 'preflight-mcp/0.1.3',
|
|
10
11
|
Accept: 'application/vnd.github+json',
|
|
11
12
|
};
|
|
12
13
|
if (cfg.githubToken) {
|
|
@@ -17,36 +18,54 @@ function githubHeaders(cfg) {
|
|
|
17
18
|
async function ensureDir(p) {
|
|
18
19
|
await fs.mkdir(p, { recursive: true });
|
|
19
20
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
/** Default timeout for GitHub API requests (30 seconds). */
|
|
22
|
+
const DEFAULT_API_TIMEOUT_MS = 30_000;
|
|
23
|
+
/** Default timeout for file downloads (5 minutes). */
|
|
24
|
+
const DEFAULT_DOWNLOAD_TIMEOUT_MS = 5 * 60_000;
|
|
25
|
+
async function fetchJson(url, headers, timeoutMs = DEFAULT_API_TIMEOUT_MS) {
|
|
26
|
+
const controller = new AbortController();
|
|
27
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
28
|
+
try {
|
|
29
|
+
const res = await fetch(url, { headers, signal: controller.signal });
|
|
30
|
+
if (!res.ok) {
|
|
31
|
+
throw new Error(`GitHub API error ${res.status}: ${res.statusText}`);
|
|
32
|
+
}
|
|
33
|
+
return (await res.json());
|
|
34
|
+
}
|
|
35
|
+
finally {
|
|
36
|
+
clearTimeout(timeoutId);
|
|
24
37
|
}
|
|
25
|
-
return (await res.json());
|
|
26
38
|
}
|
|
27
|
-
async function downloadToFile(url, headers, destPath) {
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
async function downloadToFile(url, headers, destPath, timeoutMs = DEFAULT_DOWNLOAD_TIMEOUT_MS) {
|
|
40
|
+
const controller = new AbortController();
|
|
41
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
42
|
+
try {
|
|
43
|
+
const res = await fetch(url, { headers, redirect: 'follow', signal: controller.signal });
|
|
44
|
+
if (!res.ok) {
|
|
45
|
+
throw new Error(`Download error ${res.status}: ${res.statusText}`);
|
|
46
|
+
}
|
|
47
|
+
// Use streaming if possible; otherwise fallback to arrayBuffer.
|
|
48
|
+
const anyRes = res;
|
|
49
|
+
const body = anyRes.body;
|
|
50
|
+
await ensureDir(path.dirname(destPath));
|
|
51
|
+
if (body && typeof body.pipe === 'function') {
|
|
52
|
+
// Node.js stream
|
|
53
|
+
const ws = (await import('node:fs')).createWriteStream(destPath);
|
|
54
|
+
await new Promise((resolve, reject) => {
|
|
55
|
+
body.pipe(ws);
|
|
56
|
+
body.on('error', reject);
|
|
57
|
+
ws.on('error', reject);
|
|
58
|
+
ws.on('finish', () => resolve());
|
|
59
|
+
});
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
// Web stream or no stream support.
|
|
63
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
64
|
+
await fs.writeFile(destPath, buf);
|
|
31
65
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const body = anyRes.body;
|
|
35
|
-
await ensureDir(path.dirname(destPath));
|
|
36
|
-
if (body && typeof body.pipe === 'function') {
|
|
37
|
-
// Node.js stream
|
|
38
|
-
const ws = (await import('node:fs')).createWriteStream(destPath);
|
|
39
|
-
await new Promise((resolve, reject) => {
|
|
40
|
-
body.pipe(ws);
|
|
41
|
-
body.on('error', reject);
|
|
42
|
-
ws.on('error', reject);
|
|
43
|
-
ws.on('finish', () => resolve());
|
|
44
|
-
});
|
|
45
|
-
return;
|
|
66
|
+
finally {
|
|
67
|
+
clearTimeout(timeoutId);
|
|
46
68
|
}
|
|
47
|
-
// Web stream or no stream support.
|
|
48
|
-
const buf = Buffer.from(await res.arrayBuffer());
|
|
49
|
-
await fs.writeFile(destPath, buf);
|
|
50
69
|
}
|
|
51
70
|
async function extractZip(zipPath, destDir) {
|
|
52
71
|
await ensureDir(destDir);
|
|
@@ -77,6 +96,8 @@ export async function downloadAndExtractGitHubArchive(params) {
|
|
|
77
96
|
await extractZip(zipPath, extractDir);
|
|
78
97
|
const repoRoot = await findSingleTopLevelDir(extractDir);
|
|
79
98
|
// Best-effort cleanup: remove zip file (keep extracted for caller to consume).
|
|
80
|
-
await fs.rm(zipPath, { force: true }).catch(() =>
|
|
99
|
+
await fs.rm(zipPath, { force: true }).catch((err) => {
|
|
100
|
+
logger.debug(`Failed to cleanup zip file ${zipPath} (non-critical)`, err instanceof Error ? err : undefined);
|
|
101
|
+
});
|
|
81
102
|
return { repoRoot, refUsed, fetchedAt: nowIso() };
|
|
82
103
|
}
|
package/dist/bundle/overview.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import fs from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
+
import { readFacts } from './facts.js';
|
|
3
4
|
function evidence(p, start, end) {
|
|
4
5
|
return `(evidence: ${p}:${start}-${end})`;
|
|
5
6
|
}
|
|
@@ -156,64 +157,241 @@ async function renderContext7LibraryFacts(bundleRootDir, lib) {
|
|
|
156
157
|
}
|
|
157
158
|
return out;
|
|
158
159
|
}
|
|
160
|
+
/**
|
|
161
|
+
* Phase 3: Extract project purpose from README.md
|
|
162
|
+
*/
|
|
163
|
+
async function extractProjectPurpose(files) {
|
|
164
|
+
const readme = files.find(f => f.repoRelativePath.toLowerCase() === 'readme.md');
|
|
165
|
+
if (!readme)
|
|
166
|
+
return null;
|
|
167
|
+
try {
|
|
168
|
+
const content = await fs.readFile(readme.bundleNormAbsPath, 'utf8');
|
|
169
|
+
const lines = content.split('\n');
|
|
170
|
+
// Skip title (first h1)
|
|
171
|
+
let startIdx = 0;
|
|
172
|
+
for (let i = 0; i < lines.length; i++) {
|
|
173
|
+
if (lines[i]?.startsWith('# ')) {
|
|
174
|
+
startIdx = i + 1;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Extract first paragraph (non-empty lines until empty line or next heading)
|
|
179
|
+
const paragraph = [];
|
|
180
|
+
for (let i = startIdx; i < Math.min(lines.length, startIdx + 20); i++) {
|
|
181
|
+
const line = lines[i]?.trim() || '';
|
|
182
|
+
if (!line || line.startsWith('#'))
|
|
183
|
+
break;
|
|
184
|
+
paragraph.push(line);
|
|
185
|
+
}
|
|
186
|
+
return paragraph.join(' ').trim() || null;
|
|
187
|
+
}
|
|
188
|
+
catch {
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Phase 3: Format module list for display
|
|
194
|
+
*/
|
|
195
|
+
function formatCoreModules(facts) {
|
|
196
|
+
if (!facts.modules || facts.modules.length === 0)
|
|
197
|
+
return [];
|
|
198
|
+
const coreModules = facts.modules
|
|
199
|
+
.filter(m => m.role === 'core')
|
|
200
|
+
.sort((a, b) => b.exports.length - a.exports.length)
|
|
201
|
+
.slice(0, 10);
|
|
202
|
+
if (coreModules.length === 0)
|
|
203
|
+
return [];
|
|
204
|
+
const lines = [];
|
|
205
|
+
for (const mod of coreModules) {
|
|
206
|
+
const shortPath = mod.path.replace(/^repos\/[^\/]+\/[^\/]+\/norm\//, '');
|
|
207
|
+
lines.push(`- **${shortPath}**`);
|
|
208
|
+
lines.push(` - Exports: ${mod.exports.slice(0, 5).join(', ')}${mod.exports.length > 5 ? ` (+${mod.exports.length - 5} more)` : ''}`);
|
|
209
|
+
lines.push(` - Complexity: ${mod.complexity}, LOC: ${mod.loc}`);
|
|
210
|
+
lines.push(` - Evidence: ${mod.path}:1`);
|
|
211
|
+
}
|
|
212
|
+
return lines;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Phase 3: Format standalone modules for reuse guidance
|
|
216
|
+
*/
|
|
217
|
+
function formatStandaloneModules(facts) {
|
|
218
|
+
if (!facts.modules || facts.modules.length === 0)
|
|
219
|
+
return [];
|
|
220
|
+
const standalone = facts.modules
|
|
221
|
+
.filter(m => m.standalone && (m.role === 'core' || m.role === 'utility'))
|
|
222
|
+
.filter(m => m.exports.length > 0)
|
|
223
|
+
.slice(0, 5);
|
|
224
|
+
if (standalone.length === 0)
|
|
225
|
+
return [];
|
|
226
|
+
const lines = [];
|
|
227
|
+
for (const mod of standalone) {
|
|
228
|
+
const shortPath = mod.path.replace(/^repos\/[^\/]+\/[^\/]+\/norm\//, '');
|
|
229
|
+
lines.push(`- **${shortPath}**`);
|
|
230
|
+
lines.push(` - Can be used independently`);
|
|
231
|
+
lines.push(` - Exports: ${mod.exports.slice(0, 3).join(', ')}`);
|
|
232
|
+
lines.push(` - External deps: ${mod.imports.filter(i => !i.startsWith('.')).slice(0, 3).join(', ') || 'None'}`);
|
|
233
|
+
}
|
|
234
|
+
return lines;
|
|
235
|
+
}
|
|
159
236
|
export async function generateOverviewMarkdown(params) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
.
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
.
|
|
191
|
-
|
|
192
|
-
sections.push('### Code paths spotted (sample)');
|
|
193
|
-
for (const p of codeSamples) {
|
|
194
|
-
const file = r.files.find((f) => f.repoRelativePath === p);
|
|
195
|
-
if (!file)
|
|
196
|
-
continue;
|
|
197
|
-
sections.push(`- ${file.bundleNormRelativePath}. ${evidence(file.bundleNormRelativePath, 1, 1)}`);
|
|
237
|
+
// Load FACTS.json if available
|
|
238
|
+
const factsPath = path.join(params.bundleRootDir, 'analysis', 'FACTS.json');
|
|
239
|
+
const facts = await readFacts(factsPath);
|
|
240
|
+
const sections = [];
|
|
241
|
+
// Header
|
|
242
|
+
sections.push(`# ${params.repos[0]?.repoId || 'Project'} - Overview\r\n`);
|
|
243
|
+
// Phase 3: What is this?
|
|
244
|
+
if (facts) {
|
|
245
|
+
sections.push('## What is this?\r\n');
|
|
246
|
+
// Try to get project purpose from README
|
|
247
|
+
const allFiles = params.repos.flatMap(r => r.files);
|
|
248
|
+
const purpose = await extractProjectPurpose(allFiles);
|
|
249
|
+
if (purpose) {
|
|
250
|
+
sections.push(`**Purpose**: ${purpose}\r\n`);
|
|
251
|
+
}
|
|
252
|
+
// Primary language and frameworks
|
|
253
|
+
if (facts.languages && facts.languages.length > 0) {
|
|
254
|
+
const primaryLang = facts.languages[0];
|
|
255
|
+
if (primaryLang) {
|
|
256
|
+
sections.push(`**Language**: ${primaryLang.language} (${primaryLang.fileCount} files)\r\n`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
if (facts.frameworks && facts.frameworks.length > 0) {
|
|
260
|
+
sections.push(`**Frameworks**: ${facts.frameworks.join(', ')}\r\n`);
|
|
261
|
+
}
|
|
262
|
+
// Tech stack (Phase 2)
|
|
263
|
+
if (facts.techStack) {
|
|
264
|
+
if (facts.techStack.runtime) {
|
|
265
|
+
sections.push(`**Runtime**: ${facts.techStack.runtime}\r\n`);
|
|
266
|
+
}
|
|
267
|
+
if (facts.techStack.packageManager) {
|
|
268
|
+
sections.push(`**Package Manager**: ${facts.techStack.packageManager}\r\n`);
|
|
198
269
|
}
|
|
199
270
|
}
|
|
200
271
|
sections.push('');
|
|
201
272
|
}
|
|
202
|
-
|
|
203
|
-
if (
|
|
204
|
-
sections.push('##
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
sections.push(
|
|
208
|
-
|
|
209
|
-
|
|
273
|
+
// Phase 3: Architecture
|
|
274
|
+
if (facts) {
|
|
275
|
+
sections.push('## Architecture\r\n');
|
|
276
|
+
// Entry points
|
|
277
|
+
if (facts.entryPoints && facts.entryPoints.length > 0) {
|
|
278
|
+
sections.push('### Entry Points\r\n');
|
|
279
|
+
for (const ep of facts.entryPoints.slice(0, 5)) {
|
|
280
|
+
const shortPath = ep.file.replace(/^repos\/[^\/]+\/[^\/]+\/norm\//, '');
|
|
281
|
+
sections.push(`- \`${shortPath}\` (${ep.type}). ${evidence(ep.evidence, 1, 1)}\r\n`);
|
|
210
282
|
}
|
|
211
|
-
|
|
212
|
-
|
|
283
|
+
sections.push('');
|
|
284
|
+
}
|
|
285
|
+
// Phase 2: Architecture patterns
|
|
286
|
+
if (facts.patterns && facts.patterns.length > 0) {
|
|
287
|
+
sections.push('### Design Patterns\r\n');
|
|
288
|
+
for (const pattern of facts.patterns) {
|
|
289
|
+
sections.push(`- ${pattern}\r\n`);
|
|
290
|
+
}
|
|
291
|
+
sections.push('');
|
|
292
|
+
}
|
|
293
|
+
// Phase 2: Core modules
|
|
294
|
+
const coreModuleLines = formatCoreModules(facts);
|
|
295
|
+
if (coreModuleLines.length > 0) {
|
|
296
|
+
sections.push('### Core Modules\r\n');
|
|
297
|
+
sections.push(...coreModuleLines.map(l => l + '\r\n'));
|
|
298
|
+
sections.push('');
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
// Dependencies
|
|
302
|
+
if (facts && (facts.dependencies.runtime.length > 0 || facts.dependencies.dev.length > 0)) {
|
|
303
|
+
sections.push('## Dependencies\r\n');
|
|
304
|
+
if (facts.dependencies.runtime.length > 0) {
|
|
305
|
+
sections.push(`### Production (${facts.dependencies.runtime.length})\r\n`);
|
|
306
|
+
for (const dep of facts.dependencies.runtime.slice(0, 15)) {
|
|
307
|
+
sections.push(`- ${dep.name}${dep.version ? ` ${dep.version}` : ''}\r\n`);
|
|
308
|
+
}
|
|
309
|
+
if (facts.dependencies.runtime.length > 15) {
|
|
310
|
+
sections.push(`- ... and ${facts.dependencies.runtime.length - 15} more\r\n`);
|
|
311
|
+
}
|
|
312
|
+
sections.push('');
|
|
313
|
+
}
|
|
314
|
+
if (facts.dependencies.dev.length > 0) {
|
|
315
|
+
sections.push(`### Development (${facts.dependencies.dev.length})\r\n`);
|
|
316
|
+
for (const dep of facts.dependencies.dev.slice(0, 10)) {
|
|
317
|
+
sections.push(`- ${dep.name}${dep.version ? ` ${dep.version}` : ''}\r\n`);
|
|
318
|
+
}
|
|
319
|
+
if (facts.dependencies.dev.length > 10) {
|
|
320
|
+
sections.push(`- ... and ${facts.dependencies.dev.length - 10} more\r\n`);
|
|
213
321
|
}
|
|
214
322
|
sections.push('');
|
|
215
323
|
}
|
|
216
324
|
}
|
|
325
|
+
// Phase 3: How to Reuse
|
|
326
|
+
if (facts) {
|
|
327
|
+
const standaloneLines = formatStandaloneModules(facts);
|
|
328
|
+
if (standaloneLines.length > 0) {
|
|
329
|
+
sections.push('## How to Reuse\r\n');
|
|
330
|
+
sections.push('### Standalone Modules\r\n');
|
|
331
|
+
sections.push('These modules can be extracted and used independently:\r\n\r\n');
|
|
332
|
+
sections.push(...standaloneLines.map(l => l + '\r\n'));
|
|
333
|
+
sections.push('');
|
|
334
|
+
}
|
|
335
|
+
// Return Phase 3 format directly
|
|
336
|
+
return sections.join('\n') + '\n';
|
|
337
|
+
}
|
|
338
|
+
// Fallback to legacy format if no FACTS
|
|
339
|
+
{
|
|
340
|
+
const header = `# OVERVIEW.md - Preflight Bundle ${params.bundleId}\r\n\r\nThis file is generated. It contains **only factual statements** with evidence pointers into bundle files.\r\n\r\n`;
|
|
341
|
+
sections.splice(0, sections.length); // Clear Phase 3 sections
|
|
342
|
+
sections.push(header);
|
|
343
|
+
for (const r of params.repos) {
|
|
344
|
+
sections.push(`## Repo: ${r.repoId}`);
|
|
345
|
+
const metaFacts = await renderRepoMetaFacts(params.bundleRootDir, r.repoId);
|
|
346
|
+
if (metaFacts.length) {
|
|
347
|
+
sections.push('### Snapshot facts');
|
|
348
|
+
sections.push(...metaFacts);
|
|
349
|
+
}
|
|
350
|
+
const nodeFacts = await renderNodePackageFacts(r.files);
|
|
351
|
+
if (nodeFacts.length) {
|
|
352
|
+
sections.push('### Node/JS facts');
|
|
353
|
+
sections.push(...nodeFacts);
|
|
354
|
+
}
|
|
355
|
+
const docs = getRepoDocFiles(r.files).slice(0, 50);
|
|
356
|
+
if (docs.length) {
|
|
357
|
+
sections.push('### Documentation files (first 50)');
|
|
358
|
+
for (const d of docs) {
|
|
359
|
+
sections.push(`- ${d.bundleNormRelativePath}. ${evidence(d.bundleNormRelativePath, 1, 1)}`);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
// Give a small hint about where code lives, without guessing entry points.
|
|
363
|
+
const codeSamples = r.files
|
|
364
|
+
.filter((f) => f.kind === 'code')
|
|
365
|
+
.map((f) => f.repoRelativePath)
|
|
366
|
+
.filter((p) => p.startsWith('src/') || p.startsWith('lib/'))
|
|
367
|
+
.slice(0, 10);
|
|
368
|
+
if (codeSamples.length) {
|
|
369
|
+
sections.push('### Code paths spotted (sample)');
|
|
370
|
+
for (const p of codeSamples) {
|
|
371
|
+
const file = r.files.find((f) => f.repoRelativePath === p);
|
|
372
|
+
if (!file)
|
|
373
|
+
continue;
|
|
374
|
+
sections.push(`- ${file.bundleNormRelativePath}. ${evidence(file.bundleNormRelativePath, 1, 1)}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
sections.push('');
|
|
378
|
+
}
|
|
379
|
+
const libs = params.libraries ?? [];
|
|
380
|
+
if (libs.length) {
|
|
381
|
+
sections.push('## Context7 libraries');
|
|
382
|
+
for (const lib of libs) {
|
|
383
|
+
const facts = await renderContext7LibraryFacts(params.bundleRootDir, lib);
|
|
384
|
+
sections.push(`### ${lib.input}`);
|
|
385
|
+
if (facts.length) {
|
|
386
|
+
sections.push(...facts);
|
|
387
|
+
}
|
|
388
|
+
else {
|
|
389
|
+
sections.push('- No library facts available.');
|
|
390
|
+
}
|
|
391
|
+
sections.push('');
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
217
395
|
return sections.join('\n') + '\n';
|
|
218
396
|
}
|
|
219
397
|
export async function writeOverviewFile(targetPath, markdown) {
|
package/dist/bundle/service.js
CHANGED
|
@@ -11,7 +11,6 @@ import { writeAgentsMd, writeStartHereMd } from './guides.js';
|
|
|
11
11
|
import { generateOverviewMarkdown, writeOverviewFile } from './overview.js';
|
|
12
12
|
import { rebuildIndex } from '../search/sqliteFts.js';
|
|
13
13
|
import { ingestContext7Libraries } from './context7.js';
|
|
14
|
-
import { ingestDeepWikiRepo } from './deepwiki.js';
|
|
15
14
|
import { analyzeBundleStatic } from './analysis.js';
|
|
16
15
|
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
17
16
|
import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
|
|
@@ -26,48 +25,20 @@ function normalizeList(values) {
|
|
|
26
25
|
.map((s) => s.toLowerCase())
|
|
27
26
|
.sort();
|
|
28
27
|
}
|
|
29
|
-
function normalizeDeepWikiUrl(raw) {
|
|
30
|
-
const trimmed = raw.trim();
|
|
31
|
-
try {
|
|
32
|
-
const u = new URL(trimmed);
|
|
33
|
-
u.hash = '';
|
|
34
|
-
// Normalize host and strip trailing slash.
|
|
35
|
-
u.host = u.host.toLowerCase();
|
|
36
|
-
u.pathname = u.pathname.replace(/\/+$/g, '');
|
|
37
|
-
return u.toString();
|
|
38
|
-
}
|
|
39
|
-
catch {
|
|
40
|
-
return trimmed;
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
28
|
function canonicalizeCreateInput(input) {
|
|
44
29
|
const repos = input.repos
|
|
45
30
|
.map((r) => {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return {
|
|
49
|
-
kind: 'github',
|
|
50
|
-
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
51
|
-
ref: (r.ref ?? '').trim() || undefined,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
if (r.kind === 'local') {
|
|
55
|
-
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
56
|
-
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
57
|
-
return {
|
|
58
|
-
kind: 'github',
|
|
59
|
-
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
60
|
-
ref: (r.ref ?? '').trim() || undefined,
|
|
61
|
-
};
|
|
62
|
-
}
|
|
31
|
+
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
32
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
63
33
|
return {
|
|
64
|
-
kind: '
|
|
65
|
-
|
|
34
|
+
kind: 'github',
|
|
35
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
36
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
66
37
|
};
|
|
67
38
|
})
|
|
68
39
|
.sort((a, b) => {
|
|
69
|
-
const ka =
|
|
70
|
-
const kb =
|
|
40
|
+
const ka = `github:${a.repo}:${a.ref ?? ''}`;
|
|
41
|
+
const kb = `github:${b.repo}:${b.ref ?? ''}`;
|
|
71
42
|
return ka.localeCompare(kb);
|
|
72
43
|
});
|
|
73
44
|
return {
|
|
@@ -113,8 +84,8 @@ async function writeDedupIndex(storageDir, idx) {
|
|
|
113
84
|
try {
|
|
114
85
|
await fs.unlink(tmpPath);
|
|
115
86
|
}
|
|
116
|
-
catch {
|
|
117
|
-
|
|
87
|
+
catch (cleanupErr) {
|
|
88
|
+
logger.debug('Failed to cleanup temp dedup index file (non-critical)', cleanupErr instanceof Error ? cleanupErr : undefined);
|
|
118
89
|
}
|
|
119
90
|
throw err;
|
|
120
91
|
}
|
|
@@ -131,8 +102,8 @@ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpda
|
|
|
131
102
|
idx.updatedAt = nowIso();
|
|
132
103
|
await writeDedupIndex(storageDir, idx);
|
|
133
104
|
}
|
|
134
|
-
catch {
|
|
135
|
-
|
|
105
|
+
catch (err) {
|
|
106
|
+
logger.debug(`Failed to update dedup index in ${storageDir} (best-effort)`, err instanceof Error ? err : undefined);
|
|
136
107
|
}
|
|
137
108
|
}
|
|
138
109
|
}
|
|
@@ -798,7 +769,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
798
769
|
notes: [...notes, ...skipped].slice(0, 50),
|
|
799
770
|
});
|
|
800
771
|
}
|
|
801
|
-
else
|
|
772
|
+
else {
|
|
773
|
+
// Local repository
|
|
802
774
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
803
775
|
const { files, skipped } = await ingestLocalRepo({
|
|
804
776
|
cfg,
|
|
@@ -812,21 +784,6 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
812
784
|
allIngestedFiles.push(...files);
|
|
813
785
|
reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
|
|
814
786
|
}
|
|
815
|
-
else {
|
|
816
|
-
// DeepWiki integration: fetch and convert to Markdown.
|
|
817
|
-
const deepwikiResult = await ingestDeepWikiRepo({
|
|
818
|
-
cfg,
|
|
819
|
-
bundlePaths: tmpPaths,
|
|
820
|
-
url: repoInput.url,
|
|
821
|
-
});
|
|
822
|
-
allIngestedFiles.push(...deepwikiResult.files);
|
|
823
|
-
reposSummary.push({
|
|
824
|
-
kind: 'deepwiki',
|
|
825
|
-
id: deepwikiResult.summary.repoId,
|
|
826
|
-
source: 'deepwiki',
|
|
827
|
-
notes: deepwikiResult.summary.notes,
|
|
828
|
-
});
|
|
829
|
-
}
|
|
830
787
|
}
|
|
831
788
|
// Context7 libraries (best-effort).
|
|
832
789
|
let librariesSummary;
|
|
@@ -901,7 +858,14 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
901
858
|
repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
902
859
|
libraries: librariesSummary,
|
|
903
860
|
});
|
|
904
|
-
//
|
|
861
|
+
// Generate static facts (FACTS.json) FIRST. This is intentionally non-LLM and safe to keep inside bundles.
|
|
862
|
+
await generateFactsBestEffort({
|
|
863
|
+
bundleId,
|
|
864
|
+
bundleRoot: tmpPaths.rootDir,
|
|
865
|
+
files: allIngestedFiles,
|
|
866
|
+
mode: cfg.analysisMode,
|
|
867
|
+
});
|
|
868
|
+
// Overview (S2: factual-only with evidence pointers) - generated AFTER FACTS.json
|
|
905
869
|
const perRepoOverviews = reposSummary
|
|
906
870
|
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
907
871
|
.map((r) => {
|
|
@@ -916,13 +880,6 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
916
880
|
libraries: librariesSummary,
|
|
917
881
|
});
|
|
918
882
|
await writeOverviewFile(tmpPaths.overviewPath, overviewMd);
|
|
919
|
-
// Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
|
|
920
|
-
await generateFactsBestEffort({
|
|
921
|
-
bundleId,
|
|
922
|
-
bundleRoot: tmpPaths.rootDir,
|
|
923
|
-
files: allIngestedFiles,
|
|
924
|
-
mode: cfg.analysisMode,
|
|
925
|
-
});
|
|
926
883
|
// CRITICAL: Validate bundle completeness BEFORE atomic move
|
|
927
884
|
const validation = await validateBundleCompleteness(tmpPaths.rootDir);
|
|
928
885
|
if (!validation.isValid) {
|
|
@@ -978,8 +935,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
978
935
|
}
|
|
979
936
|
finally {
|
|
980
937
|
// Ensure temp directory is cleaned up (double safety)
|
|
981
|
-
await rmIfExists(tmpPaths.rootDir).catch(() => {
|
|
982
|
-
|
|
938
|
+
await rmIfExists(tmpPaths.rootDir).catch((err) => {
|
|
939
|
+
logger.debug('Failed to cleanup temp bundle directory in finally block (non-critical)', err instanceof Error ? err : undefined);
|
|
983
940
|
});
|
|
984
941
|
}
|
|
985
942
|
}
|
|
@@ -1008,19 +965,14 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
1008
965
|
hasUpdates = true;
|
|
1009
966
|
details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
|
|
1010
967
|
}
|
|
1011
|
-
else
|
|
968
|
+
else {
|
|
969
|
+
// Local: can't reliably detect whether local files changed without scanning; assume possible update.
|
|
1012
970
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1013
971
|
const repoId = `${owner}/${repo}`;
|
|
1014
|
-
// We can't reliably detect whether local files changed without scanning; assume possible update.
|
|
1015
972
|
const prev = manifest.repos.find((r) => r.id === repoId);
|
|
1016
973
|
details.push({ repoId, currentSha: prev?.headSha, changed: true });
|
|
1017
974
|
hasUpdates = true;
|
|
1018
975
|
}
|
|
1019
|
-
else {
|
|
1020
|
-
// DeepWiki: can't easily detect changes, assume possible update
|
|
1021
|
-
details.push({ repoId: repoInput.url, changed: true });
|
|
1022
|
-
hasUpdates = true;
|
|
1023
|
-
}
|
|
1024
976
|
}
|
|
1025
977
|
return { hasUpdates, details };
|
|
1026
978
|
}
|
|
@@ -1122,41 +1074,6 @@ async function scanBundleIndexableFiles(params) {
|
|
|
1122
1074
|
});
|
|
1123
1075
|
}
|
|
1124
1076
|
}
|
|
1125
|
-
// 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
|
|
1126
|
-
const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
|
|
1127
|
-
const dwSt = await statOrNull(deepwikiDir);
|
|
1128
|
-
if (dwSt?.isDirectory()) {
|
|
1129
|
-
// Only walk the norm subtrees.
|
|
1130
|
-
const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
|
|
1131
|
-
for (const ownerEnt of owners) {
|
|
1132
|
-
if (!ownerEnt.isDirectory())
|
|
1133
|
-
continue;
|
|
1134
|
-
const owner = ownerEnt.name;
|
|
1135
|
-
const ownerDir = path.join(deepwikiDir, owner);
|
|
1136
|
-
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1137
|
-
for (const repoEnt of repos) {
|
|
1138
|
-
if (!repoEnt.isDirectory())
|
|
1139
|
-
continue;
|
|
1140
|
-
const repo = repoEnt.name;
|
|
1141
|
-
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1142
|
-
const normSt = await statOrNull(normDir);
|
|
1143
|
-
if (!normSt?.isDirectory())
|
|
1144
|
-
continue;
|
|
1145
|
-
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1146
|
-
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1147
|
-
continue;
|
|
1148
|
-
const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
|
|
1149
|
-
await pushFile({
|
|
1150
|
-
repoId: `deepwiki:${owner}/${repo}`,
|
|
1151
|
-
kind: 'doc',
|
|
1152
|
-
repoRelativePath: wf.relPosix,
|
|
1153
|
-
bundleRelPosix: bundleRel,
|
|
1154
|
-
absPath: wf.absPath,
|
|
1155
|
-
});
|
|
1156
|
-
}
|
|
1157
|
-
}
|
|
1158
|
-
}
|
|
1159
|
-
}
|
|
1160
1077
|
return { files, totalBytes, skipped };
|
|
1161
1078
|
}
|
|
1162
1079
|
export async function repairBundle(cfg, bundleId, options) {
|
|
@@ -1319,7 +1236,8 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1319
1236
|
allIngestedFiles.push(...files);
|
|
1320
1237
|
reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
|
|
1321
1238
|
}
|
|
1322
|
-
else
|
|
1239
|
+
else {
|
|
1240
|
+
// Local repository
|
|
1323
1241
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1324
1242
|
const repoId = `${owner}/${repo}`;
|
|
1325
1243
|
const { files, skipped } = await ingestLocalRepo({
|
|
@@ -1335,23 +1253,6 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1335
1253
|
reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
|
|
1336
1254
|
changed = true;
|
|
1337
1255
|
}
|
|
1338
|
-
else {
|
|
1339
|
-
// DeepWiki integration: fetch and convert to Markdown.
|
|
1340
|
-
const deepwikiResult = await ingestDeepWikiRepo({
|
|
1341
|
-
cfg,
|
|
1342
|
-
bundlePaths: paths,
|
|
1343
|
-
url: repoInput.url,
|
|
1344
|
-
});
|
|
1345
|
-
allIngestedFiles.push(...deepwikiResult.files);
|
|
1346
|
-
reposSummary.push({
|
|
1347
|
-
kind: 'deepwiki',
|
|
1348
|
-
id: deepwikiResult.summary.repoId,
|
|
1349
|
-
source: 'deepwiki',
|
|
1350
|
-
notes: deepwikiResult.summary.notes,
|
|
1351
|
-
});
|
|
1352
|
-
// Always mark as changed for DeepWiki since we can't easily detect content changes.
|
|
1353
|
-
changed = true;
|
|
1354
|
-
}
|
|
1355
1256
|
}
|
|
1356
1257
|
// Context7 libraries (best-effort).
|
|
1357
1258
|
let librariesSummary;
|