preflight-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -0
- package/README.zh-CN.md +406 -0
- package/dist/bundle/analysis.js +91 -0
- package/dist/bundle/context7.js +301 -0
- package/dist/bundle/deepwiki.js +206 -0
- package/dist/bundle/facts.js +296 -0
- package/dist/bundle/github.js +55 -0
- package/dist/bundle/guides.js +65 -0
- package/dist/bundle/ingest.js +152 -0
- package/dist/bundle/manifest.js +14 -0
- package/dist/bundle/overview.js +222 -0
- package/dist/bundle/paths.js +29 -0
- package/dist/bundle/service.js +803 -0
- package/dist/bundle/tagging.js +206 -0
- package/dist/config.js +65 -0
- package/dist/context7/client.js +30 -0
- package/dist/context7/tools.js +58 -0
- package/dist/core/scheduler.js +166 -0
- package/dist/errors.js +150 -0
- package/dist/index.js +7 -0
- package/dist/jobs/bundle-auto-update-job.js +71 -0
- package/dist/jobs/health-check-job.js +172 -0
- package/dist/jobs/storage-cleanup-job.js +148 -0
- package/dist/logging/logger.js +311 -0
- package/dist/mcp/uris.js +45 -0
- package/dist/search/sqliteFts.js +481 -0
- package/dist/server/optimized-server.js +255 -0
- package/dist/server.js +778 -0
- package/dist/storage/compression.js +249 -0
- package/dist/storage/storage-adapter.js +316 -0
- package/dist/utils/index.js +100 -0
- package/package.json +44 -0
|
@@ -0,0 +1,803 @@
|
|
|
1
|
+
import crypto from 'node:crypto';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { logger } from '../logging/logger.js';
|
|
5
|
+
import { getLocalHeadSha, getRemoteHeadSha, parseOwnerRepo, shallowClone, toCloneUrl, } from './github.js';
|
|
6
|
+
import { ingestRepoToBundle } from './ingest.js';
|
|
7
|
+
import { writeManifest, readManifest } from './manifest.js';
|
|
8
|
+
import { getBundlePaths, repoMetaPath, repoNormDir, repoRawDir, repoRootDir } from './paths.js';
|
|
9
|
+
import { writeAgentsMd, writeStartHereMd } from './guides.js';
|
|
10
|
+
import { generateOverviewMarkdown, writeOverviewFile } from './overview.js';
|
|
11
|
+
import { rebuildIndex } from '../search/sqliteFts.js';
|
|
12
|
+
import { ingestContext7Libraries } from './context7.js';
|
|
13
|
+
import { ingestDeepWikiRepo } from './deepwiki.js';
|
|
14
|
+
import { analyzeBundleStatic } from './analysis.js';
|
|
15
|
+
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
16
|
+
async function ensureDir(p) {
|
|
17
|
+
await fs.mkdir(p, { recursive: true });
|
|
18
|
+
}
|
|
19
|
+
function nowIso() {
|
|
20
|
+
return new Date().toISOString();
|
|
21
|
+
}
|
|
22
|
+
async function writeRepoMeta(params) {
|
|
23
|
+
await ensureDir(path.dirname(params.metaPath));
|
|
24
|
+
const obj = {
|
|
25
|
+
repoId: params.repoId,
|
|
26
|
+
cloneUrl: params.cloneUrl,
|
|
27
|
+
headSha: params.headSha,
|
|
28
|
+
fetchedAt: params.fetchedAt,
|
|
29
|
+
ingestedFiles: params.ingestedFiles,
|
|
30
|
+
skipped: params.skipped,
|
|
31
|
+
};
|
|
32
|
+
await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
|
|
33
|
+
}
|
|
34
|
+
async function rmIfExists(p) {
|
|
35
|
+
await fs.rm(p, { recursive: true, force: true });
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Validate bundle completeness after creation.
|
|
39
|
+
* Ensures all critical files exist and have meaningful content.
|
|
40
|
+
*/
|
|
41
|
+
async function validateBundleCompleteness(bundleRoot) {
|
|
42
|
+
const requiredFiles = [
|
|
43
|
+
'manifest.json',
|
|
44
|
+
'START_HERE.md',
|
|
45
|
+
'AGENTS.md',
|
|
46
|
+
'OVERVIEW.md',
|
|
47
|
+
];
|
|
48
|
+
const missingComponents = [];
|
|
49
|
+
// Check required files
|
|
50
|
+
for (const file of requiredFiles) {
|
|
51
|
+
const filePath = path.join(bundleRoot, file);
|
|
52
|
+
try {
|
|
53
|
+
const stats = await fs.stat(filePath);
|
|
54
|
+
// Check if file has meaningful content (not empty)
|
|
55
|
+
if (stats.size === 0) {
|
|
56
|
+
missingComponents.push(`${file} (empty)`);
|
|
57
|
+
}
|
|
58
|
+
else if (file === 'manifest.json' && stats.size < 50) {
|
|
59
|
+
// Manifest should be at least 50 bytes
|
|
60
|
+
missingComponents.push(`${file} (too small, likely incomplete)`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
missingComponents.push(`${file} (missing)`);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// Check if search index exists
|
|
68
|
+
const indexPath = path.join(bundleRoot, 'indexes', 'search.sqlite3');
|
|
69
|
+
try {
|
|
70
|
+
const stats = await fs.stat(indexPath);
|
|
71
|
+
if (stats.size === 0) {
|
|
72
|
+
missingComponents.push('indexes/search.sqlite3 (empty)');
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
missingComponents.push('indexes/search.sqlite3 (missing)');
|
|
77
|
+
}
|
|
78
|
+
// Check if at least one repo was ingested
|
|
79
|
+
const reposDir = path.join(bundleRoot, 'repos');
|
|
80
|
+
try {
|
|
81
|
+
const repoEntries = await fs.readdir(reposDir);
|
|
82
|
+
const hasRepos = repoEntries.length > 0;
|
|
83
|
+
if (!hasRepos) {
|
|
84
|
+
missingComponents.push('repos/ (empty - no repositories ingested)');
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// Check if repos have actual content
|
|
88
|
+
let hasContent = false;
|
|
89
|
+
for (const entry of repoEntries) {
|
|
90
|
+
const entryPath = path.join(reposDir, entry);
|
|
91
|
+
const stat = await fs.stat(entryPath);
|
|
92
|
+
if (stat.isDirectory()) {
|
|
93
|
+
const subEntries = await fs.readdir(entryPath);
|
|
94
|
+
if (subEntries.length > 0) {
|
|
95
|
+
hasContent = true;
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (!hasContent) {
|
|
101
|
+
missingComponents.push('repos/ (no actual content)');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
missingComponents.push('repos/ (missing)');
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
isValid: missingComponents.length === 0,
|
|
110
|
+
missingComponents,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Detect primary language from ingested files
|
|
115
|
+
*/
|
|
116
|
+
function detectPrimaryLanguage(files) {
|
|
117
|
+
const extToLang = {
|
|
118
|
+
'.ts': 'TypeScript',
|
|
119
|
+
'.tsx': 'TypeScript',
|
|
120
|
+
'.js': 'JavaScript',
|
|
121
|
+
'.jsx': 'JavaScript',
|
|
122
|
+
'.py': 'Python',
|
|
123
|
+
'.go': 'Go',
|
|
124
|
+
'.rs': 'Rust',
|
|
125
|
+
'.java': 'Java',
|
|
126
|
+
'.rb': 'Ruby',
|
|
127
|
+
'.php': 'PHP',
|
|
128
|
+
};
|
|
129
|
+
const langCounts = new Map();
|
|
130
|
+
for (const file of files) {
|
|
131
|
+
if (file.kind !== 'code')
|
|
132
|
+
continue;
|
|
133
|
+
const ext = path.extname(file.repoRelativePath).toLowerCase();
|
|
134
|
+
const lang = extToLang[ext];
|
|
135
|
+
if (lang) {
|
|
136
|
+
langCounts.set(lang, (langCounts.get(lang) || 0) + 1);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (langCounts.size === 0)
|
|
140
|
+
return undefined;
|
|
141
|
+
// Return the most common language
|
|
142
|
+
let maxLang;
|
|
143
|
+
let maxCount = 0;
|
|
144
|
+
for (const [lang, count] of langCounts) {
|
|
145
|
+
if (count > maxCount) {
|
|
146
|
+
maxCount = count;
|
|
147
|
+
maxLang = lang;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return maxLang;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Clean up failed bundle creation from all storage directories.
|
|
154
|
+
*/
|
|
155
|
+
async function cleanupFailedBundle(cfg, bundleId) {
|
|
156
|
+
logger.warn(`Cleaning up failed bundle: ${bundleId}`);
|
|
157
|
+
// Clean from all storage directories
|
|
158
|
+
for (const storageDir of cfg.storageDirs) {
|
|
159
|
+
const bundlePath = path.join(storageDir, bundleId);
|
|
160
|
+
try {
|
|
161
|
+
const exists = await isPathAvailable(bundlePath);
|
|
162
|
+
if (exists) {
|
|
163
|
+
await rmIfExists(bundlePath);
|
|
164
|
+
logger.info(`Removed failed bundle from: ${storageDir}`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch (err) {
|
|
168
|
+
logger.error(`Failed to cleanup bundle from ${storageDir}`, err instanceof Error ? err : undefined);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// Also clean up temp directory
|
|
172
|
+
const tmpCheckout = path.join(cfg.tmpDir, 'checkouts', bundleId);
|
|
173
|
+
try {
|
|
174
|
+
await rmIfExists(tmpCheckout);
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
// Ignore cleanup errors
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
/** Check if a path is accessible (mount exists). */
|
|
181
|
+
async function isPathAvailable(p) {
|
|
182
|
+
try {
|
|
183
|
+
await fs.access(p);
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
/** Check if a path's parent directory is accessible. */
|
|
191
|
+
async function isParentAvailable(p) {
|
|
192
|
+
const parent = path.dirname(p);
|
|
193
|
+
return isPathAvailable(parent);
|
|
194
|
+
}
|
|
195
|
+
/** Copy directory recursively. */
|
|
196
|
+
async function copyDir(src, dest) {
|
|
197
|
+
await fs.cp(src, dest, { recursive: true, force: true });
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Mirror a bundle to all backup storage directories.
|
|
201
|
+
* Skips unavailable paths (mount disappeared) without blocking.
|
|
202
|
+
* Returns list of successful/failed mirror targets.
|
|
203
|
+
*/
|
|
204
|
+
async function mirrorBundleToBackups(primaryDir, backupDirs, bundleId) {
|
|
205
|
+
const srcPath = path.join(primaryDir, bundleId);
|
|
206
|
+
const mirrored = [];
|
|
207
|
+
const failed = [];
|
|
208
|
+
for (const backupDir of backupDirs) {
|
|
209
|
+
if (backupDir === primaryDir)
|
|
210
|
+
continue; // Skip primary
|
|
211
|
+
const destPath = path.join(backupDir, bundleId);
|
|
212
|
+
try {
|
|
213
|
+
// Check if backup location is available
|
|
214
|
+
const parentAvailable = await isParentAvailable(destPath);
|
|
215
|
+
if (!parentAvailable) {
|
|
216
|
+
failed.push({ path: backupDir, error: 'Mount not available' });
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
// Ensure backup dir exists
|
|
220
|
+
await ensureDir(backupDir);
|
|
221
|
+
// Remove old and copy new
|
|
222
|
+
await rmIfExists(destPath);
|
|
223
|
+
await copyDir(srcPath, destPath);
|
|
224
|
+
mirrored.push(backupDir);
|
|
225
|
+
}
|
|
226
|
+
catch (err) {
|
|
227
|
+
failed.push({ path: backupDir, error: err instanceof Error ? err.message : String(err) });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return { mirrored, failed };
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Find the first available storage directory from the list.
|
|
234
|
+
* Returns null if none are available.
|
|
235
|
+
*/
|
|
236
|
+
async function findFirstAvailableStorageDir(storageDirs) {
|
|
237
|
+
for (const dir of storageDirs) {
|
|
238
|
+
if (await isPathAvailable(dir)) {
|
|
239
|
+
return dir;
|
|
240
|
+
}
|
|
241
|
+
// Also check if parent is available (mount point exists but dir not created yet)
|
|
242
|
+
const parent = path.dirname(dir);
|
|
243
|
+
if (await isPathAvailable(parent)) {
|
|
244
|
+
return dir;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Get the effective storage directory for reading.
|
|
251
|
+
* Falls back to first available if primary is unavailable.
|
|
252
|
+
*/
|
|
253
|
+
export async function getEffectiveStorageDir(cfg) {
|
|
254
|
+
// Try primary first
|
|
255
|
+
if (await isPathAvailable(cfg.storageDir)) {
|
|
256
|
+
return cfg.storageDir;
|
|
257
|
+
}
|
|
258
|
+
// Fallback to first available
|
|
259
|
+
const available = await findFirstAvailableStorageDir(cfg.storageDirs);
|
|
260
|
+
if (available) {
|
|
261
|
+
return available;
|
|
262
|
+
}
|
|
263
|
+
// No storage available - return primary and let caller handle the error
|
|
264
|
+
return cfg.storageDir;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Get the effective storage directory for writing.
|
|
268
|
+
* Falls back to first available if primary is unavailable.
|
|
269
|
+
* Also ensures the directory exists.
|
|
270
|
+
*/
|
|
271
|
+
export async function getEffectiveStorageDirForWrite(cfg) {
|
|
272
|
+
// Try primary first
|
|
273
|
+
const primaryParent = path.dirname(cfg.storageDir);
|
|
274
|
+
if (await isPathAvailable(primaryParent)) {
|
|
275
|
+
await ensureDir(cfg.storageDir);
|
|
276
|
+
return cfg.storageDir;
|
|
277
|
+
}
|
|
278
|
+
// Fallback to first available
|
|
279
|
+
for (const dir of cfg.storageDirs) {
|
|
280
|
+
const parent = path.dirname(dir);
|
|
281
|
+
if (await isPathAvailable(parent)) {
|
|
282
|
+
await ensureDir(dir);
|
|
283
|
+
return dir;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// No storage available - throw error
|
|
287
|
+
throw new Error('No storage directory available. All mount points are inaccessible.');
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Sync stale backups: copy from source to any backup that has older data.
|
|
291
|
+
* Called after reading from a backup (means primary was unavailable).
|
|
292
|
+
*/
|
|
293
|
+
async function syncStaleBackups(sourceDir, allDirs, bundleId) {
|
|
294
|
+
const srcManifestPath = path.join(sourceDir, bundleId, 'manifest.json');
|
|
295
|
+
let srcUpdatedAt;
|
|
296
|
+
try {
|
|
297
|
+
const srcManifest = await readManifest(srcManifestPath);
|
|
298
|
+
srcUpdatedAt = srcManifest.updatedAt;
|
|
299
|
+
}
|
|
300
|
+
catch {
|
|
301
|
+
return; // Can't read source, skip sync
|
|
302
|
+
}
|
|
303
|
+
for (const dir of allDirs) {
|
|
304
|
+
if (dir === sourceDir)
|
|
305
|
+
continue;
|
|
306
|
+
try {
|
|
307
|
+
if (!(await isPathAvailable(dir)))
|
|
308
|
+
continue;
|
|
309
|
+
const destManifestPath = path.join(dir, bundleId, 'manifest.json');
|
|
310
|
+
let needsSync = false;
|
|
311
|
+
try {
|
|
312
|
+
const destManifest = await readManifest(destManifestPath);
|
|
313
|
+
// Sync if destination is older
|
|
314
|
+
needsSync = new Date(destManifest.updatedAt) < new Date(srcUpdatedAt);
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
// Destination doesn't exist or can't read - needs sync
|
|
318
|
+
needsSync = true;
|
|
319
|
+
}
|
|
320
|
+
if (needsSync) {
|
|
321
|
+
await ensureDir(dir);
|
|
322
|
+
const srcPath = path.join(sourceDir, bundleId);
|
|
323
|
+
const destPath = path.join(dir, bundleId);
|
|
324
|
+
await rmIfExists(destPath);
|
|
325
|
+
await copyDir(srcPath, destPath);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
catch {
|
|
329
|
+
// Skip failed syncs silently
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
async function cloneAndIngestGitHubRepo(params) {
|
|
334
|
+
const repoId = `${params.owner}/${params.repo}`;
|
|
335
|
+
const cloneUrl = toCloneUrl({ owner: params.owner, repo: params.repo });
|
|
336
|
+
const tmpCheckout = path.join(params.cfg.tmpDir, 'checkouts', params.bundleId, `${params.owner}__${params.repo}`);
|
|
337
|
+
await rmIfExists(tmpCheckout);
|
|
338
|
+
await shallowClone(cloneUrl, tmpCheckout, { ref: params.ref });
|
|
339
|
+
const headSha = await getLocalHeadSha(tmpCheckout);
|
|
340
|
+
const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
|
|
341
|
+
const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
|
|
342
|
+
const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
|
|
343
|
+
await rmIfExists(rawDest);
|
|
344
|
+
await rmIfExists(normDest);
|
|
345
|
+
await ensureDir(rawDest);
|
|
346
|
+
await ensureDir(normDest);
|
|
347
|
+
const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
|
|
348
|
+
const ingested = await ingestRepoToBundle({
|
|
349
|
+
repoId,
|
|
350
|
+
repoRoot: tmpCheckout,
|
|
351
|
+
rawDestRoot: rawDest,
|
|
352
|
+
normDestRoot: normDest,
|
|
353
|
+
bundleNormPrefixPosix,
|
|
354
|
+
options: {
|
|
355
|
+
maxFileBytes: params.cfg.maxFileBytes,
|
|
356
|
+
maxTotalBytes: params.cfg.maxTotalBytes,
|
|
357
|
+
},
|
|
358
|
+
});
|
|
359
|
+
const fetchedAt = nowIso();
|
|
360
|
+
await writeRepoMeta({
|
|
361
|
+
metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
|
|
362
|
+
repoId,
|
|
363
|
+
cloneUrl,
|
|
364
|
+
headSha,
|
|
365
|
+
fetchedAt,
|
|
366
|
+
ingestedFiles: ingested.files.length,
|
|
367
|
+
skipped: ingested.skipped,
|
|
368
|
+
});
|
|
369
|
+
await rmIfExists(tmpCheckout);
|
|
370
|
+
return { headSha, files: ingested.files, skipped: ingested.skipped };
|
|
371
|
+
}
|
|
372
|
+
function groupFilesByRepoId(files) {
|
|
373
|
+
const byRepo = new Map();
|
|
374
|
+
for (const f of files) {
|
|
375
|
+
const arr = byRepo.get(f.repoId);
|
|
376
|
+
if (arr) {
|
|
377
|
+
arr.push(f);
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
byRepo.set(f.repoId, [f]);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
return Array.from(byRepo.entries()).map(([repoId, repoFiles]) => ({ repoId, files: repoFiles }));
|
|
384
|
+
}
|
|
385
|
+
async function generateFactsBestEffort(params) {
|
|
386
|
+
if (params.mode === 'none')
|
|
387
|
+
return;
|
|
388
|
+
try {
|
|
389
|
+
const repos = groupFilesByRepoId(params.files);
|
|
390
|
+
const result = await analyzeBundleStatic({
|
|
391
|
+
bundleId: params.bundleId,
|
|
392
|
+
bundleRoot: params.bundleRoot,
|
|
393
|
+
repos,
|
|
394
|
+
mode: params.mode,
|
|
395
|
+
});
|
|
396
|
+
if (result.error) {
|
|
397
|
+
logger.warn('Static analysis error', { error: result.error });
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
catch (err) {
|
|
401
|
+
logger.error('Static analysis exception', err instanceof Error ? err : undefined);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
export async function createBundle(cfg, input) {
|
|
405
|
+
const bundleId = crypto.randomUUID();
|
|
406
|
+
const createdAt = nowIso();
|
|
407
|
+
// Use effective storage dir (falls back if primary unavailable)
|
|
408
|
+
const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
|
|
409
|
+
await ensureDir(cfg.tmpDir);
|
|
410
|
+
const paths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
411
|
+
await ensureDir(paths.rootDir);
|
|
412
|
+
let bundleCreated = false;
|
|
413
|
+
const allIngestedFiles = [];
|
|
414
|
+
const reposSummary = [];
|
|
415
|
+
try {
|
|
416
|
+
bundleCreated = true; // Mark that bundle directory was created
|
|
417
|
+
for (const repoInput of input.repos) {
|
|
418
|
+
if (repoInput.kind === 'github') {
|
|
419
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
420
|
+
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
421
|
+
cfg,
|
|
422
|
+
bundleId,
|
|
423
|
+
storageDir: effectiveStorageDir,
|
|
424
|
+
owner,
|
|
425
|
+
repo,
|
|
426
|
+
ref: repoInput.ref,
|
|
427
|
+
});
|
|
428
|
+
allIngestedFiles.push(...files);
|
|
429
|
+
reposSummary.push({ kind: 'github', id: `${owner}/${repo}`, headSha, notes: skipped.slice(0, 50) });
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
// DeepWiki integration: fetch and convert to Markdown.
|
|
433
|
+
const deepwikiResult = await ingestDeepWikiRepo({
|
|
434
|
+
cfg,
|
|
435
|
+
bundlePaths: paths,
|
|
436
|
+
url: repoInput.url,
|
|
437
|
+
});
|
|
438
|
+
allIngestedFiles.push(...deepwikiResult.files);
|
|
439
|
+
reposSummary.push({
|
|
440
|
+
kind: 'deepwiki',
|
|
441
|
+
id: deepwikiResult.summary.repoId,
|
|
442
|
+
notes: deepwikiResult.summary.notes,
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
// Context7 libraries (best-effort).
|
|
447
|
+
let librariesSummary;
|
|
448
|
+
if (input.libraries?.length) {
|
|
449
|
+
// Clean libraries dir in case something wrote here earlier.
|
|
450
|
+
await rmIfExists(paths.librariesDir);
|
|
451
|
+
await ensureDir(paths.librariesDir);
|
|
452
|
+
const libIngest = await ingestContext7Libraries({
|
|
453
|
+
cfg,
|
|
454
|
+
bundlePaths: paths,
|
|
455
|
+
libraries: input.libraries,
|
|
456
|
+
topics: input.topics,
|
|
457
|
+
});
|
|
458
|
+
allIngestedFiles.push(...libIngest.files);
|
|
459
|
+
librariesSummary = libIngest.libraries;
|
|
460
|
+
}
|
|
461
|
+
// Build index.
|
|
462
|
+
await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
|
|
463
|
+
includeDocs: true,
|
|
464
|
+
includeCode: true,
|
|
465
|
+
});
|
|
466
|
+
// Auto-generate metadata (displayName, tags, description)
|
|
467
|
+
const repoIds = reposSummary.map((r) => r.id);
|
|
468
|
+
const displayName = generateDisplayName(repoIds);
|
|
469
|
+
const tags = autoDetectTags({
|
|
470
|
+
repoIds,
|
|
471
|
+
files: allIngestedFiles,
|
|
472
|
+
facts: undefined, // Will be populated later if analysis runs
|
|
473
|
+
});
|
|
474
|
+
const description = generateDescription({
|
|
475
|
+
repoIds,
|
|
476
|
+
tags,
|
|
477
|
+
facts: undefined,
|
|
478
|
+
});
|
|
479
|
+
const primaryLanguage = allIngestedFiles.length > 0 ? detectPrimaryLanguage(allIngestedFiles) : undefined;
|
|
480
|
+
const manifest = {
|
|
481
|
+
schemaVersion: 1,
|
|
482
|
+
bundleId,
|
|
483
|
+
createdAt,
|
|
484
|
+
updatedAt: createdAt,
|
|
485
|
+
displayName,
|
|
486
|
+
description,
|
|
487
|
+
tags,
|
|
488
|
+
primaryLanguage,
|
|
489
|
+
inputs: {
|
|
490
|
+
repos: input.repos,
|
|
491
|
+
libraries: input.libraries,
|
|
492
|
+
topics: input.topics,
|
|
493
|
+
},
|
|
494
|
+
repos: reposSummary.map((r) => ({
|
|
495
|
+
kind: r.kind,
|
|
496
|
+
id: r.id,
|
|
497
|
+
headSha: r.headSha,
|
|
498
|
+
fetchedAt: createdAt,
|
|
499
|
+
notes: r.notes,
|
|
500
|
+
})),
|
|
501
|
+
libraries: librariesSummary,
|
|
502
|
+
index: {
|
|
503
|
+
backend: 'sqlite-fts5-lines',
|
|
504
|
+
includeDocs: true,
|
|
505
|
+
includeCode: true,
|
|
506
|
+
},
|
|
507
|
+
};
|
|
508
|
+
await writeManifest(paths.manifestPath, manifest);
|
|
509
|
+
// Guides.
|
|
510
|
+
await writeAgentsMd(paths.agentsPath);
|
|
511
|
+
await writeStartHereMd({
|
|
512
|
+
targetPath: paths.startHerePath,
|
|
513
|
+
bundleId,
|
|
514
|
+
repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
515
|
+
libraries: librariesSummary,
|
|
516
|
+
});
|
|
517
|
+
// Overview (S2: factual-only with evidence pointers).
|
|
518
|
+
const perRepoOverviews = reposSummary
|
|
519
|
+
.filter((r) => r.kind === 'github')
|
|
520
|
+
.map((r) => {
|
|
521
|
+
const repoId = r.id;
|
|
522
|
+
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
523
|
+
return { repoId, headSha: r.headSha, files: repoFiles };
|
|
524
|
+
});
|
|
525
|
+
const overviewMd = await generateOverviewMarkdown({
|
|
526
|
+
bundleId,
|
|
527
|
+
bundleRootDir: paths.rootDir,
|
|
528
|
+
repos: perRepoOverviews,
|
|
529
|
+
libraries: librariesSummary,
|
|
530
|
+
});
|
|
531
|
+
await writeOverviewFile(paths.overviewPath, overviewMd);
|
|
532
|
+
// Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
|
|
533
|
+
await generateFactsBestEffort({
|
|
534
|
+
bundleId,
|
|
535
|
+
bundleRoot: paths.rootDir,
|
|
536
|
+
files: allIngestedFiles,
|
|
537
|
+
mode: cfg.analysisMode,
|
|
538
|
+
});
|
|
539
|
+
// Mirror to backup storage directories (non-blocking on failures)
|
|
540
|
+
if (cfg.storageDirs.length > 1) {
|
|
541
|
+
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
542
|
+
}
|
|
543
|
+
// CRITICAL: Validate bundle completeness before finalizing
|
|
544
|
+
const validation = await validateBundleCompleteness(paths.rootDir);
|
|
545
|
+
if (!validation.isValid) {
|
|
546
|
+
const errorMsg = `Bundle creation incomplete. Missing: ${validation.missingComponents.join(', ')}`;
|
|
547
|
+
logger.error(errorMsg);
|
|
548
|
+
throw new Error(errorMsg);
|
|
549
|
+
}
|
|
550
|
+
const summary = {
|
|
551
|
+
bundleId,
|
|
552
|
+
createdAt,
|
|
553
|
+
updatedAt: createdAt,
|
|
554
|
+
repos: reposSummary,
|
|
555
|
+
libraries: librariesSummary,
|
|
556
|
+
};
|
|
557
|
+
return summary;
|
|
558
|
+
}
|
|
559
|
+
catch (err) {
|
|
560
|
+
// If bundle directory was created, clean it up
|
|
561
|
+
if (bundleCreated) {
|
|
562
|
+
logger.error(`Bundle creation failed, cleaning up: ${bundleId}`, err instanceof Error ? err : undefined);
|
|
563
|
+
await cleanupFailedBundle(cfg, bundleId);
|
|
564
|
+
}
|
|
565
|
+
// Enhance error message
|
|
566
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
567
|
+
throw new Error(`Failed to create bundle: ${errorMsg}`);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
/** Check if a bundle has upstream changes without applying updates. */
|
|
571
|
+
export async function checkForUpdates(cfg, bundleId) {
|
|
572
|
+
const effectiveStorageDir = await getEffectiveStorageDir(cfg);
|
|
573
|
+
const paths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
574
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
575
|
+
const details = [];
|
|
576
|
+
let hasUpdates = false;
|
|
577
|
+
for (const repoInput of manifest.inputs.repos) {
|
|
578
|
+
if (repoInput.kind === 'github') {
|
|
579
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
580
|
+
const repoId = `${owner}/${repo}`;
|
|
581
|
+
const cloneUrl = toCloneUrl({ owner, repo });
|
|
582
|
+
const prev = manifest.repos.find((r) => r.kind === 'github' && r.id === repoId);
|
|
583
|
+
let remoteSha;
|
|
584
|
+
try {
|
|
585
|
+
remoteSha = await getRemoteHeadSha(cloneUrl);
|
|
586
|
+
}
|
|
587
|
+
catch {
|
|
588
|
+
// ignore
|
|
589
|
+
}
|
|
590
|
+
const changed = !!(remoteSha && prev?.headSha && remoteSha !== prev.headSha);
|
|
591
|
+
if (changed)
|
|
592
|
+
hasUpdates = true;
|
|
593
|
+
details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
|
|
594
|
+
}
|
|
595
|
+
else {
|
|
596
|
+
// DeepWiki: can't easily detect changes, assume possible update
|
|
597
|
+
details.push({ repoId: repoInput.url, changed: true });
|
|
598
|
+
hasUpdates = true;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return { hasUpdates, details };
|
|
602
|
+
}
|
|
603
|
+
export async function updateBundle(cfg, bundleId, options) {
|
|
604
|
+
// Use effective storage dir (falls back if primary unavailable)
|
|
605
|
+
const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
|
|
606
|
+
const paths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
607
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
608
|
+
const updatedAt = nowIso();
|
|
609
|
+
let changed = false;
|
|
610
|
+
const allIngestedFiles = [];
|
|
611
|
+
const reposSummary = [];
|
|
612
|
+
// Rebuild everything obvious for now (simple + deterministic).
|
|
613
|
+
for (const repoInput of manifest.inputs.repos) {
|
|
614
|
+
if (repoInput.kind === 'github') {
|
|
615
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
616
|
+
const repoId = `${owner}/${repo}`;
|
|
617
|
+
const cloneUrl = toCloneUrl({ owner, repo });
|
|
618
|
+
let remoteSha;
|
|
619
|
+
try {
|
|
620
|
+
remoteSha = await getRemoteHeadSha(cloneUrl);
|
|
621
|
+
}
|
|
622
|
+
catch {
|
|
623
|
+
// ignore remote check errors; proceed to clone anyway.
|
|
624
|
+
}
|
|
625
|
+
const prev = manifest.repos.find((r) => r.kind === 'github' && r.id === repoId);
|
|
626
|
+
if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
|
|
627
|
+
changed = true;
|
|
628
|
+
}
|
|
629
|
+
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
630
|
+
cfg,
|
|
631
|
+
bundleId,
|
|
632
|
+
storageDir: effectiveStorageDir,
|
|
633
|
+
owner,
|
|
634
|
+
repo,
|
|
635
|
+
ref: repoInput.ref,
|
|
636
|
+
});
|
|
637
|
+
if (prev?.headSha && headSha !== prev.headSha) {
|
|
638
|
+
changed = true;
|
|
639
|
+
}
|
|
640
|
+
allIngestedFiles.push(...files);
|
|
641
|
+
reposSummary.push({ kind: 'github', id: repoId, headSha, notes: skipped.slice(0, 50) });
|
|
642
|
+
}
|
|
643
|
+
else {
|
|
644
|
+
// DeepWiki integration: fetch and convert to Markdown.
|
|
645
|
+
const deepwikiResult = await ingestDeepWikiRepo({
|
|
646
|
+
cfg,
|
|
647
|
+
bundlePaths: paths,
|
|
648
|
+
url: repoInput.url,
|
|
649
|
+
});
|
|
650
|
+
allIngestedFiles.push(...deepwikiResult.files);
|
|
651
|
+
reposSummary.push({
|
|
652
|
+
kind: 'deepwiki',
|
|
653
|
+
id: deepwikiResult.summary.repoId,
|
|
654
|
+
notes: deepwikiResult.summary.notes,
|
|
655
|
+
});
|
|
656
|
+
// Always mark as changed for DeepWiki since we can't easily detect content changes.
|
|
657
|
+
changed = true;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// Context7 libraries (best-effort).
|
|
661
|
+
let librariesSummary;
|
|
662
|
+
if (manifest.inputs.libraries?.length) {
|
|
663
|
+
await rmIfExists(paths.librariesDir);
|
|
664
|
+
await ensureDir(paths.librariesDir);
|
|
665
|
+
const libIngest = await ingestContext7Libraries({
|
|
666
|
+
cfg,
|
|
667
|
+
bundlePaths: paths,
|
|
668
|
+
libraries: manifest.inputs.libraries,
|
|
669
|
+
topics: manifest.inputs.topics,
|
|
670
|
+
});
|
|
671
|
+
allIngestedFiles.push(...libIngest.files);
|
|
672
|
+
librariesSummary = libIngest.libraries;
|
|
673
|
+
}
|
|
674
|
+
// Rebuild index.
|
|
675
|
+
await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
|
|
676
|
+
includeDocs: manifest.index.includeDocs,
|
|
677
|
+
includeCode: manifest.index.includeCode,
|
|
678
|
+
});
|
|
679
|
+
const newManifest = {
|
|
680
|
+
...manifest,
|
|
681
|
+
updatedAt,
|
|
682
|
+
repos: reposSummary.map((r) => ({
|
|
683
|
+
kind: r.kind,
|
|
684
|
+
id: r.id,
|
|
685
|
+
headSha: r.headSha,
|
|
686
|
+
fetchedAt: updatedAt,
|
|
687
|
+
notes: r.notes,
|
|
688
|
+
})),
|
|
689
|
+
libraries: librariesSummary,
|
|
690
|
+
};
|
|
691
|
+
await writeManifest(paths.manifestPath, newManifest);
|
|
692
|
+
// Regenerate guides + overview.
|
|
693
|
+
await writeAgentsMd(paths.agentsPath);
|
|
694
|
+
await writeStartHereMd({
|
|
695
|
+
targetPath: paths.startHerePath,
|
|
696
|
+
bundleId,
|
|
697
|
+
repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
698
|
+
libraries: librariesSummary,
|
|
699
|
+
});
|
|
700
|
+
const perRepoOverviews = reposSummary
|
|
701
|
+
.filter((r) => r.kind === 'github')
|
|
702
|
+
.map((r) => {
|
|
703
|
+
const repoId = r.id;
|
|
704
|
+
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
705
|
+
return { repoId, headSha: r.headSha, files: repoFiles };
|
|
706
|
+
});
|
|
707
|
+
const overviewMd = await generateOverviewMarkdown({
|
|
708
|
+
bundleId,
|
|
709
|
+
bundleRootDir: paths.rootDir,
|
|
710
|
+
repos: perRepoOverviews,
|
|
711
|
+
libraries: librariesSummary,
|
|
712
|
+
});
|
|
713
|
+
await writeOverviewFile(paths.overviewPath, overviewMd);
|
|
714
|
+
// Refresh static facts (FACTS.json) after update.
|
|
715
|
+
await generateFactsBestEffort({
|
|
716
|
+
bundleId,
|
|
717
|
+
bundleRoot: paths.rootDir,
|
|
718
|
+
files: allIngestedFiles,
|
|
719
|
+
mode: cfg.analysisMode,
|
|
720
|
+
});
|
|
721
|
+
// Mirror to backup storage directories (non-blocking on failures)
|
|
722
|
+
if (cfg.storageDirs.length > 1) {
|
|
723
|
+
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
724
|
+
}
|
|
725
|
+
const summary = {
|
|
726
|
+
bundleId,
|
|
727
|
+
createdAt: manifest.createdAt,
|
|
728
|
+
updatedAt,
|
|
729
|
+
repos: reposSummary,
|
|
730
|
+
libraries: librariesSummary,
|
|
731
|
+
};
|
|
732
|
+
return { summary, changed };
|
|
733
|
+
}
|
|
734
|
+
/** List bundles from a single storage directory. */
|
|
735
|
+
export async function listBundles(storageDir) {
|
|
736
|
+
try {
|
|
737
|
+
const entries = await fs.readdir(storageDir, { withFileTypes: true });
|
|
738
|
+
return entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
|
739
|
+
}
|
|
740
|
+
catch {
|
|
741
|
+
return [];
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
/** List bundles from multiple storage directories (deduped). */
|
|
745
|
+
export async function listBundlesMulti(storageDirs) {
|
|
746
|
+
const all = await Promise.all(storageDirs.map((d) => listBundles(d)));
|
|
747
|
+
return [...new Set(all.flat())];
|
|
748
|
+
}
|
|
749
|
+
/** Check if bundle exists in a single storage directory. */
|
|
750
|
+
export async function bundleExists(storageDir, bundleId) {
|
|
751
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
752
|
+
try {
|
|
753
|
+
await fs.stat(paths.manifestPath);
|
|
754
|
+
return true;
|
|
755
|
+
}
|
|
756
|
+
catch {
|
|
757
|
+
return false;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
/** Find which storage directory contains the bundle (returns null if not found). */
|
|
761
|
+
export async function findBundleStorageDir(storageDirs, bundleId) {
|
|
762
|
+
for (const dir of storageDirs) {
|
|
763
|
+
if (await bundleExists(dir, bundleId)) {
|
|
764
|
+
return dir;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
return null;
|
|
768
|
+
}
|
|
769
|
+
/** Check if bundle exists in any of the storage directories. */
|
|
770
|
+
export async function bundleExistsMulti(storageDirs, bundleId) {
|
|
771
|
+
return (await findBundleStorageDir(storageDirs, bundleId)) !== null;
|
|
772
|
+
}
|
|
773
|
+
export async function getBundleRoot(storageDir, bundleId) {
|
|
774
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
775
|
+
return paths.rootDir;
|
|
776
|
+
}
|
|
777
|
+
export function getBundlePathsForId(storageDir, bundleId) {
|
|
778
|
+
return getBundlePaths(storageDir, bundleId);
|
|
779
|
+
}
|
|
780
|
+
export async function clearBundle(storageDir, bundleId) {
|
|
781
|
+
const p = getBundlePaths(storageDir, bundleId);
|
|
782
|
+
await rmIfExists(p.rootDir);
|
|
783
|
+
}
|
|
784
|
+
/** Clear bundle from ALL storage directories (mirror delete). */
|
|
785
|
+
export async function clearBundleMulti(storageDirs, bundleId) {
|
|
786
|
+
let deleted = false;
|
|
787
|
+
for (const dir of storageDirs) {
|
|
788
|
+
try {
|
|
789
|
+
if (await bundleExists(dir, bundleId)) {
|
|
790
|
+
await clearBundle(dir, bundleId);
|
|
791
|
+
deleted = true;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
catch {
|
|
795
|
+
// Skip unavailable paths
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
return deleted;
|
|
799
|
+
}
|
|
800
|
+
export async function ensureRepoDirRemoved(storageDir, bundleId, owner, repo) {
|
|
801
|
+
const p = getBundlePaths(storageDir, bundleId);
|
|
802
|
+
await rmIfExists(repoRootDir(p, owner, repo));
|
|
803
|
+
}
|