preflight-mcp 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +282 -27
- package/README.zh-CN.md +277 -308
- package/dist/bundle/cleanup.js +155 -0
- package/dist/bundle/deepwiki.js +1 -1
- package/dist/bundle/github.js +100 -15
- package/dist/bundle/githubArchive.js +82 -0
- package/dist/bundle/ingest.js +2 -2
- package/dist/bundle/paths.js +23 -0
- package/dist/bundle/service.js +800 -57
- package/dist/config.js +1 -0
- package/dist/context7/client.js +1 -1
- package/dist/core/concurrency-limiter.js +100 -0
- package/dist/core/scheduler.js +4 -1
- package/dist/jobs/tmp-cleanup-job.js +71 -0
- package/dist/mcp/errorKinds.js +54 -0
- package/dist/mcp/uris.js +28 -8
- package/dist/search/sqliteFts.js +68 -36
- package/dist/server/optimized-server.js +4 -0
- package/dist/server.js +498 -279
- package/dist/tools/searchByTags.js +80 -0
- package/package.json +26 -1
package/dist/bundle/service.js
CHANGED
|
@@ -3,7 +3,8 @@ import fs from 'node:fs/promises';
|
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { logger } from '../logging/logger.js';
|
|
5
5
|
import { getLocalHeadSha, getRemoteHeadSha, parseOwnerRepo, shallowClone, toCloneUrl, } from './github.js';
|
|
6
|
-
import {
|
|
6
|
+
import { downloadAndExtractGitHubArchive } from './githubArchive.js';
|
|
7
|
+
import { classifyIngestedFileKind, ingestRepoToBundle } from './ingest.js';
|
|
7
8
|
import { writeManifest, readManifest } from './manifest.js';
|
|
8
9
|
import { getBundlePaths, repoMetaPath, repoNormDir, repoRawDir, repoRootDir } from './paths.js';
|
|
9
10
|
import { writeAgentsMd, writeStartHereMd } from './guides.js';
|
|
@@ -13,22 +14,261 @@ import { ingestContext7Libraries } from './context7.js';
|
|
|
13
14
|
import { ingestDeepWikiRepo } from './deepwiki.js';
|
|
14
15
|
import { analyzeBundleStatic } from './analysis.js';
|
|
15
16
|
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
17
|
+
import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
|
|
18
|
+
const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
|
|
19
|
+
function sha256Hex(text) {
|
|
20
|
+
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
21
|
+
}
|
|
22
|
+
function normalizeList(values) {
|
|
23
|
+
return (values ?? [])
|
|
24
|
+
.map((s) => s.trim())
|
|
25
|
+
.filter(Boolean)
|
|
26
|
+
.map((s) => s.toLowerCase())
|
|
27
|
+
.sort();
|
|
28
|
+
}
|
|
29
|
+
function normalizeDeepWikiUrl(raw) {
|
|
30
|
+
const trimmed = raw.trim();
|
|
31
|
+
try {
|
|
32
|
+
const u = new URL(trimmed);
|
|
33
|
+
u.hash = '';
|
|
34
|
+
// Normalize host and strip trailing slash.
|
|
35
|
+
u.host = u.host.toLowerCase();
|
|
36
|
+
u.pathname = u.pathname.replace(/\/+$/g, '');
|
|
37
|
+
return u.toString();
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return trimmed;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function canonicalizeCreateInput(input) {
|
|
44
|
+
const repos = input.repos
|
|
45
|
+
.map((r) => {
|
|
46
|
+
if (r.kind === 'github') {
|
|
47
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
48
|
+
return {
|
|
49
|
+
kind: 'github',
|
|
50
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
51
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
if (r.kind === 'local') {
|
|
55
|
+
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
56
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
57
|
+
return {
|
|
58
|
+
kind: 'github',
|
|
59
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
60
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
kind: 'deepwiki',
|
|
65
|
+
url: normalizeDeepWikiUrl(r.url),
|
|
66
|
+
};
|
|
67
|
+
})
|
|
68
|
+
.sort((a, b) => {
|
|
69
|
+
const ka = a.kind === 'github' ? `github:${a.repo}:${a.ref ?? ''}` : `deepwiki:${a.url}`;
|
|
70
|
+
const kb = b.kind === 'github' ? `github:${b.repo}:${b.ref ?? ''}` : `deepwiki:${b.url}`;
|
|
71
|
+
return ka.localeCompare(kb);
|
|
72
|
+
});
|
|
73
|
+
return {
|
|
74
|
+
schemaVersion: 1,
|
|
75
|
+
repos,
|
|
76
|
+
libraries: normalizeList(input.libraries),
|
|
77
|
+
topics: normalizeList(input.topics),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
export function computeCreateInputFingerprint(input) {
|
|
81
|
+
const canonical = canonicalizeCreateInput(input);
|
|
82
|
+
return sha256Hex(JSON.stringify(canonical));
|
|
83
|
+
}
|
|
84
|
+
function dedupIndexPath(storageDir) {
|
|
85
|
+
return path.join(storageDir, DEDUP_INDEX_FILE);
|
|
86
|
+
}
|
|
87
|
+
async function readDedupIndex(storageDir) {
|
|
88
|
+
const p = dedupIndexPath(storageDir);
|
|
89
|
+
try {
|
|
90
|
+
const raw = await fs.readFile(p, 'utf8');
|
|
91
|
+
const parsed = JSON.parse(raw);
|
|
92
|
+
if (parsed.schemaVersion !== 1 || typeof parsed.byFingerprint !== 'object' || !parsed.byFingerprint) {
|
|
93
|
+
return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
|
|
94
|
+
}
|
|
95
|
+
return parsed;
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
async function writeDedupIndex(storageDir, idx) {
|
|
102
|
+
const p = dedupIndexPath(storageDir);
|
|
103
|
+
await ensureDir(path.dirname(p));
|
|
104
|
+
// Use atomic write (write to temp file, then rename) to prevent corruption
|
|
105
|
+
const tmpPath = `${p}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
|
|
106
|
+
try {
|
|
107
|
+
await fs.writeFile(tmpPath, JSON.stringify(idx, null, 2) + '\n', 'utf8');
|
|
108
|
+
// Atomic rename on POSIX; near-atomic on Windows
|
|
109
|
+
await fs.rename(tmpPath, p);
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
// Clean up temp file on error
|
|
113
|
+
try {
|
|
114
|
+
await fs.unlink(tmpPath);
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
// Ignore cleanup errors
|
|
118
|
+
}
|
|
119
|
+
throw err;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
|
|
123
|
+
for (const storageDir of cfg.storageDirs) {
|
|
124
|
+
try {
|
|
125
|
+
const parentAvailable = await isParentAvailable(storageDir);
|
|
126
|
+
if (!parentAvailable)
|
|
127
|
+
continue;
|
|
128
|
+
await ensureDir(storageDir);
|
|
129
|
+
const idx = await readDedupIndex(storageDir);
|
|
130
|
+
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
|
|
131
|
+
idx.updatedAt = nowIso();
|
|
132
|
+
await writeDedupIndex(storageDir, idx);
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// best-effort
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
async function readBundleSummary(cfg, bundleId) {
|
|
140
|
+
const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
|
|
141
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
142
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
143
|
+
return {
|
|
144
|
+
bundleId: manifest.bundleId,
|
|
145
|
+
createdAt: manifest.createdAt,
|
|
146
|
+
updatedAt: manifest.updatedAt,
|
|
147
|
+
repos: manifest.repos.map((r) => ({
|
|
148
|
+
kind: r.kind,
|
|
149
|
+
id: r.id,
|
|
150
|
+
source: r.source,
|
|
151
|
+
headSha: r.headSha,
|
|
152
|
+
notes: r.notes,
|
|
153
|
+
})),
|
|
154
|
+
libraries: manifest.libraries,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
export async function findBundleByInputs(cfg, input) {
|
|
158
|
+
const fingerprint = computeCreateInputFingerprint(input);
|
|
159
|
+
return findExistingBundleByFingerprint(cfg, fingerprint);
|
|
160
|
+
}
|
|
161
|
+
async function findExistingBundleByFingerprint(cfg, fingerprint) {
|
|
162
|
+
// Fast path: consult any available dedup index.
|
|
163
|
+
for (const storageDir of cfg.storageDirs) {
|
|
164
|
+
try {
|
|
165
|
+
if (!(await isPathAvailable(storageDir)))
|
|
166
|
+
continue;
|
|
167
|
+
const idx = await readDedupIndex(storageDir);
|
|
168
|
+
const hit = idx.byFingerprint[fingerprint];
|
|
169
|
+
if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
|
|
170
|
+
return hit.bundleId;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// ignore
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// Slow path: scan manifests (works even for bundles created before fingerprints existed).
|
|
178
|
+
let best = null;
|
|
179
|
+
for (const storageDir of cfg.storageDirs) {
|
|
180
|
+
if (!(await isPathAvailable(storageDir)))
|
|
181
|
+
continue;
|
|
182
|
+
const ids = await listBundles(storageDir);
|
|
183
|
+
for (const id of ids) {
|
|
184
|
+
try {
|
|
185
|
+
const paths = getBundlePaths(storageDir, id);
|
|
186
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
187
|
+
const fp = computeCreateInputFingerprint({
|
|
188
|
+
repos: manifest.inputs.repos,
|
|
189
|
+
libraries: manifest.inputs.libraries,
|
|
190
|
+
topics: manifest.inputs.topics,
|
|
191
|
+
});
|
|
192
|
+
if (fp === fingerprint) {
|
|
193
|
+
const updatedAt = manifest.updatedAt;
|
|
194
|
+
if (!best || new Date(updatedAt) > new Date(best.updatedAt)) {
|
|
195
|
+
best = { bundleId: manifest.bundleId, updatedAt };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
// ignore corrupt bundles
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (best) {
|
|
205
|
+
// Seed index for next time (best-effort).
|
|
206
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, best.bundleId, best.updatedAt);
|
|
207
|
+
return best.bundleId;
|
|
208
|
+
}
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
16
211
|
async function ensureDir(p) {
|
|
17
212
|
await fs.mkdir(p, { recursive: true });
|
|
18
213
|
}
|
|
19
214
|
function nowIso() {
|
|
20
215
|
return new Date().toISOString();
|
|
21
216
|
}
|
|
217
|
+
function toPosix(p) {
|
|
218
|
+
return p.replaceAll('\\', '/');
|
|
219
|
+
}
|
|
220
|
+
function sha256Text(text) {
|
|
221
|
+
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
222
|
+
}
|
|
223
|
+
async function statOrNull(p) {
|
|
224
|
+
try {
|
|
225
|
+
return await fs.stat(p);
|
|
226
|
+
}
|
|
227
|
+
catch {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
async function readUtf8OrNull(p) {
|
|
232
|
+
try {
|
|
233
|
+
return await fs.readFile(p, 'utf8');
|
|
234
|
+
}
|
|
235
|
+
catch {
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
async function* walkFilesNoIgnore(rootDir) {
|
|
240
|
+
const stack = [rootDir];
|
|
241
|
+
while (stack.length) {
|
|
242
|
+
const dir = stack.pop();
|
|
243
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
244
|
+
for (const ent of entries) {
|
|
245
|
+
const abs = path.join(dir, ent.name);
|
|
246
|
+
const rel = toPosix(path.relative(rootDir, abs));
|
|
247
|
+
if (ent.isDirectory()) {
|
|
248
|
+
stack.push(abs);
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
if (!ent.isFile())
|
|
252
|
+
continue;
|
|
253
|
+
yield { absPath: abs, relPosix: rel };
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
22
257
|
async function writeRepoMeta(params) {
|
|
23
258
|
await ensureDir(path.dirname(params.metaPath));
|
|
24
259
|
const obj = {
|
|
25
260
|
repoId: params.repoId,
|
|
26
261
|
cloneUrl: params.cloneUrl,
|
|
27
|
-
headSha: params.headSha,
|
|
28
262
|
fetchedAt: params.fetchedAt,
|
|
29
263
|
ingestedFiles: params.ingestedFiles,
|
|
30
264
|
skipped: params.skipped,
|
|
31
265
|
};
|
|
266
|
+
if (params.headSha)
|
|
267
|
+
obj.headSha = params.headSha;
|
|
268
|
+
if (params.source)
|
|
269
|
+
obj.source = params.source;
|
|
270
|
+
if (params.ref)
|
|
271
|
+
obj.ref = params.ref;
|
|
32
272
|
await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
|
|
33
273
|
}
|
|
34
274
|
async function rmIfExists(p) {
|
|
@@ -205,26 +445,47 @@ async function mirrorBundleToBackups(primaryDir, backupDirs, bundleId) {
|
|
|
205
445
|
const srcPath = path.join(primaryDir, bundleId);
|
|
206
446
|
const mirrored = [];
|
|
207
447
|
const failed = [];
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
448
|
+
// Mirror to all backup dirs in parallel for better performance
|
|
449
|
+
const mirrorPromises = backupDirs
|
|
450
|
+
.filter(dir => dir !== primaryDir) // Skip primary
|
|
451
|
+
.map(async (backupDir) => {
|
|
211
452
|
const destPath = path.join(backupDir, bundleId);
|
|
212
453
|
try {
|
|
213
454
|
// Check if backup location is available
|
|
214
455
|
const parentAvailable = await isParentAvailable(destPath);
|
|
215
456
|
if (!parentAvailable) {
|
|
216
|
-
|
|
217
|
-
continue;
|
|
457
|
+
return { success: false, path: backupDir, error: 'Mount not available' };
|
|
218
458
|
}
|
|
219
459
|
// Ensure backup dir exists
|
|
220
460
|
await ensureDir(backupDir);
|
|
221
461
|
// Remove old and copy new
|
|
222
462
|
await rmIfExists(destPath);
|
|
223
463
|
await copyDir(srcPath, destPath);
|
|
224
|
-
|
|
464
|
+
return { success: true, path: backupDir };
|
|
225
465
|
}
|
|
226
466
|
catch (err) {
|
|
227
|
-
|
|
467
|
+
return {
|
|
468
|
+
success: false,
|
|
469
|
+
path: backupDir,
|
|
470
|
+
error: err instanceof Error ? err.message : String(err)
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
});
|
|
474
|
+
// Wait for all mirror operations to complete
|
|
475
|
+
const results = await Promise.allSettled(mirrorPromises);
|
|
476
|
+
for (const result of results) {
|
|
477
|
+
if (result.status === 'fulfilled') {
|
|
478
|
+
const { success, path: backupPath, error } = result.value;
|
|
479
|
+
if (success) {
|
|
480
|
+
mirrored.push(backupPath);
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
failed.push({ path: backupPath, error: error ?? 'Unknown error' });
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
else {
|
|
487
|
+
// Promise rejection (shouldn't happen with try-catch, but handle it)
|
|
488
|
+
failed.push({ path: 'unknown', error: result.reason?.message ?? String(result.reason) });
|
|
228
489
|
}
|
|
229
490
|
}
|
|
230
491
|
return { mirrored, failed };
|
|
@@ -330,13 +591,91 @@ async function syncStaleBackups(sourceDir, allDirs, bundleId) {
|
|
|
330
591
|
}
|
|
331
592
|
}
|
|
332
593
|
}
|
|
594
|
+
async function writeLocalRepoMeta(params) {
|
|
595
|
+
await ensureDir(path.dirname(params.metaPath));
|
|
596
|
+
const obj = {
|
|
597
|
+
repoId: params.repoId,
|
|
598
|
+
source: 'local',
|
|
599
|
+
localPath: params.localPath,
|
|
600
|
+
ref: params.ref,
|
|
601
|
+
fetchedAt: params.fetchedAt,
|
|
602
|
+
ingestedFiles: params.ingestedFiles,
|
|
603
|
+
skipped: params.skipped,
|
|
604
|
+
};
|
|
605
|
+
await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
|
|
606
|
+
}
|
|
607
|
+
async function ingestLocalRepo(params) {
|
|
608
|
+
const repoId = `${params.owner}/${params.repo}`;
|
|
609
|
+
const repoRoot = path.resolve(params.localPath);
|
|
610
|
+
const st = await fs.stat(repoRoot);
|
|
611
|
+
if (!st.isDirectory()) {
|
|
612
|
+
throw new Error(`Local repo path is not a directory: ${repoRoot}`);
|
|
613
|
+
}
|
|
614
|
+
const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
|
|
615
|
+
const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
|
|
616
|
+
const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
|
|
617
|
+
await rmIfExists(rawDest);
|
|
618
|
+
await rmIfExists(normDest);
|
|
619
|
+
await ensureDir(rawDest);
|
|
620
|
+
await ensureDir(normDest);
|
|
621
|
+
const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
|
|
622
|
+
const ingested = await ingestRepoToBundle({
|
|
623
|
+
repoId,
|
|
624
|
+
repoRoot,
|
|
625
|
+
rawDestRoot: rawDest,
|
|
626
|
+
normDestRoot: normDest,
|
|
627
|
+
bundleNormPrefixPosix,
|
|
628
|
+
options: {
|
|
629
|
+
maxFileBytes: params.cfg.maxFileBytes,
|
|
630
|
+
maxTotalBytes: params.cfg.maxTotalBytes,
|
|
631
|
+
},
|
|
632
|
+
});
|
|
633
|
+
const fetchedAt = nowIso();
|
|
634
|
+
await writeLocalRepoMeta({
|
|
635
|
+
metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
|
|
636
|
+
repoId,
|
|
637
|
+
localPath: repoRoot,
|
|
638
|
+
ref: params.ref,
|
|
639
|
+
fetchedAt,
|
|
640
|
+
ingestedFiles: ingested.files.length,
|
|
641
|
+
skipped: ingested.skipped,
|
|
642
|
+
});
|
|
643
|
+
return { files: ingested.files, skipped: ingested.skipped };
|
|
644
|
+
}
|
|
333
645
|
async function cloneAndIngestGitHubRepo(params) {
|
|
334
646
|
const repoId = `${params.owner}/${params.repo}`;
|
|
335
647
|
const cloneUrl = toCloneUrl({ owner: params.owner, repo: params.repo });
|
|
336
|
-
const
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
648
|
+
const tmpBase = path.join(params.cfg.tmpDir, 'checkouts', params.bundleId, `${params.owner}__${params.repo}`);
|
|
649
|
+
const tmpCheckoutGit = tmpBase;
|
|
650
|
+
const tmpArchiveDir = `${tmpBase}__archive`;
|
|
651
|
+
await rmIfExists(tmpCheckoutGit);
|
|
652
|
+
await rmIfExists(tmpArchiveDir);
|
|
653
|
+
let repoRootForIngest = tmpCheckoutGit;
|
|
654
|
+
let headSha;
|
|
655
|
+
const notes = [];
|
|
656
|
+
let source = 'git';
|
|
657
|
+
let fetchedAt = nowIso();
|
|
658
|
+
let refUsed = params.ref;
|
|
659
|
+
try {
|
|
660
|
+
await shallowClone(cloneUrl, tmpCheckoutGit, { ref: params.ref, timeoutMs: params.cfg.gitCloneTimeoutMs });
|
|
661
|
+
headSha = await getLocalHeadSha(tmpCheckoutGit);
|
|
662
|
+
}
|
|
663
|
+
catch (err) {
|
|
664
|
+
// Fallback: GitHub archive download (zipball) + extract.
|
|
665
|
+
source = 'archive';
|
|
666
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
667
|
+
notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
|
|
668
|
+
const archive = await downloadAndExtractGitHubArchive({
|
|
669
|
+
cfg: params.cfg,
|
|
670
|
+
owner: params.owner,
|
|
671
|
+
repo: params.repo,
|
|
672
|
+
ref: params.ref,
|
|
673
|
+
destDir: tmpArchiveDir,
|
|
674
|
+
});
|
|
675
|
+
repoRootForIngest = archive.repoRoot;
|
|
676
|
+
fetchedAt = archive.fetchedAt;
|
|
677
|
+
refUsed = archive.refUsed;
|
|
678
|
+
}
|
|
340
679
|
const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
|
|
341
680
|
const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
|
|
342
681
|
const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
|
|
@@ -347,7 +686,7 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
347
686
|
const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
|
|
348
687
|
const ingested = await ingestRepoToBundle({
|
|
349
688
|
repoId,
|
|
350
|
-
repoRoot:
|
|
689
|
+
repoRoot: repoRootForIngest,
|
|
351
690
|
rawDestRoot: rawDest,
|
|
352
691
|
normDestRoot: normDest,
|
|
353
692
|
bundleNormPrefixPosix,
|
|
@@ -356,7 +695,6 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
356
695
|
maxTotalBytes: params.cfg.maxTotalBytes,
|
|
357
696
|
},
|
|
358
697
|
});
|
|
359
|
-
const fetchedAt = nowIso();
|
|
360
698
|
await writeRepoMeta({
|
|
361
699
|
metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
|
|
362
700
|
repoId,
|
|
@@ -365,9 +703,12 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
365
703
|
fetchedAt,
|
|
366
704
|
ingestedFiles: ingested.files.length,
|
|
367
705
|
skipped: ingested.skipped,
|
|
706
|
+
source,
|
|
707
|
+
ref: refUsed,
|
|
368
708
|
});
|
|
369
|
-
await rmIfExists(
|
|
370
|
-
|
|
709
|
+
await rmIfExists(tmpCheckoutGit);
|
|
710
|
+
await rmIfExists(tmpArchiveDir);
|
|
711
|
+
return { headSha, files: ingested.files, skipped: ingested.skipped, notes, source };
|
|
371
712
|
}
|
|
372
713
|
function groupFilesByRepoId(files) {
|
|
373
714
|
const byRepo = new Map();
|
|
@@ -401,44 +742,88 @@ async function generateFactsBestEffort(params) {
|
|
|
401
742
|
logger.error('Static analysis exception', err instanceof Error ? err : undefined);
|
|
402
743
|
}
|
|
403
744
|
}
|
|
404
|
-
export async function createBundle(cfg, input) {
|
|
745
|
+
export async function createBundle(cfg, input, options) {
|
|
746
|
+
// Apply concurrency limiting to prevent DoS attacks
|
|
747
|
+
return await bundleCreationLimiter.run(async () => {
|
|
748
|
+
return await createBundleInternal(cfg, input, options);
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
async function createBundleInternal(cfg, input, options) {
|
|
752
|
+
const fingerprint = computeCreateInputFingerprint(input);
|
|
753
|
+
const ifExists = options?.ifExists ?? 'error';
|
|
754
|
+
if (ifExists !== 'createNew') {
|
|
755
|
+
const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
|
|
756
|
+
if (existing) {
|
|
757
|
+
if (ifExists === 'returnExisting') {
|
|
758
|
+
return await readBundleSummary(cfg, existing);
|
|
759
|
+
}
|
|
760
|
+
if (ifExists === 'updateExisting') {
|
|
761
|
+
const { summary } = await updateBundle(cfg, existing);
|
|
762
|
+
return summary;
|
|
763
|
+
}
|
|
764
|
+
throw new Error(`Bundle already exists for these inputs: ${existing}`);
|
|
765
|
+
}
|
|
766
|
+
}
|
|
405
767
|
const bundleId = crypto.randomUUID();
|
|
406
768
|
const createdAt = nowIso();
|
|
407
769
|
// Use effective storage dir (falls back if primary unavailable)
|
|
408
770
|
const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
|
|
409
|
-
|
|
410
|
-
const
|
|
411
|
-
await ensureDir(
|
|
412
|
-
|
|
771
|
+
// Create bundle in temporary directory for atomic creation
|
|
772
|
+
const tmpBundlesDir = path.join(cfg.tmpDir, 'bundles-wip');
|
|
773
|
+
await ensureDir(tmpBundlesDir);
|
|
774
|
+
const tmpPaths = getBundlePaths(tmpBundlesDir, bundleId);
|
|
775
|
+
await ensureDir(tmpPaths.rootDir);
|
|
776
|
+
const finalPaths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
413
777
|
const allIngestedFiles = [];
|
|
414
778
|
const reposSummary = [];
|
|
415
779
|
try {
|
|
416
|
-
|
|
780
|
+
// All operations happen in tmpPaths (temporary directory)
|
|
417
781
|
for (const repoInput of input.repos) {
|
|
418
782
|
if (repoInput.kind === 'github') {
|
|
419
783
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
420
|
-
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
784
|
+
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
421
785
|
cfg,
|
|
422
786
|
bundleId,
|
|
423
|
-
storageDir:
|
|
787
|
+
storageDir: tmpBundlesDir,
|
|
424
788
|
owner,
|
|
425
789
|
repo,
|
|
426
790
|
ref: repoInput.ref,
|
|
427
791
|
});
|
|
428
792
|
allIngestedFiles.push(...files);
|
|
429
|
-
reposSummary.push({
|
|
793
|
+
reposSummary.push({
|
|
794
|
+
kind: 'github',
|
|
795
|
+
id: `${owner}/${repo}`,
|
|
796
|
+
source,
|
|
797
|
+
headSha,
|
|
798
|
+
notes: [...notes, ...skipped].slice(0, 50),
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
else if (repoInput.kind === 'local') {
|
|
802
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
803
|
+
const { files, skipped } = await ingestLocalRepo({
|
|
804
|
+
cfg,
|
|
805
|
+
bundleId,
|
|
806
|
+
storageDir: tmpBundlesDir,
|
|
807
|
+
owner,
|
|
808
|
+
repo,
|
|
809
|
+
localPath: repoInput.path,
|
|
810
|
+
ref: repoInput.ref,
|
|
811
|
+
});
|
|
812
|
+
allIngestedFiles.push(...files);
|
|
813
|
+
reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
|
|
430
814
|
}
|
|
431
815
|
else {
|
|
432
816
|
// DeepWiki integration: fetch and convert to Markdown.
|
|
433
817
|
const deepwikiResult = await ingestDeepWikiRepo({
|
|
434
818
|
cfg,
|
|
435
|
-
bundlePaths:
|
|
819
|
+
bundlePaths: tmpPaths,
|
|
436
820
|
url: repoInput.url,
|
|
437
821
|
});
|
|
438
822
|
allIngestedFiles.push(...deepwikiResult.files);
|
|
439
823
|
reposSummary.push({
|
|
440
824
|
kind: 'deepwiki',
|
|
441
825
|
id: deepwikiResult.summary.repoId,
|
|
826
|
+
source: 'deepwiki',
|
|
442
827
|
notes: deepwikiResult.summary.notes,
|
|
443
828
|
});
|
|
444
829
|
}
|
|
@@ -447,11 +832,11 @@ export async function createBundle(cfg, input) {
|
|
|
447
832
|
let librariesSummary;
|
|
448
833
|
if (input.libraries?.length) {
|
|
449
834
|
// Clean libraries dir in case something wrote here earlier.
|
|
450
|
-
await rmIfExists(
|
|
451
|
-
await ensureDir(
|
|
835
|
+
await rmIfExists(tmpPaths.librariesDir);
|
|
836
|
+
await ensureDir(tmpPaths.librariesDir);
|
|
452
837
|
const libIngest = await ingestContext7Libraries({
|
|
453
838
|
cfg,
|
|
454
|
-
bundlePaths:
|
|
839
|
+
bundlePaths: tmpPaths,
|
|
455
840
|
libraries: input.libraries,
|
|
456
841
|
topics: input.topics,
|
|
457
842
|
});
|
|
@@ -459,7 +844,7 @@ export async function createBundle(cfg, input) {
|
|
|
459
844
|
librariesSummary = libIngest.libraries;
|
|
460
845
|
}
|
|
461
846
|
// Build index.
|
|
462
|
-
await rebuildIndex(
|
|
847
|
+
await rebuildIndex(tmpPaths.searchDbPath, allIngestedFiles, {
|
|
463
848
|
includeDocs: true,
|
|
464
849
|
includeCode: true,
|
|
465
850
|
});
|
|
@@ -482,6 +867,7 @@ export async function createBundle(cfg, input) {
|
|
|
482
867
|
bundleId,
|
|
483
868
|
createdAt,
|
|
484
869
|
updatedAt: createdAt,
|
|
870
|
+
fingerprint,
|
|
485
871
|
displayName,
|
|
486
872
|
description,
|
|
487
873
|
tags,
|
|
@@ -494,6 +880,7 @@ export async function createBundle(cfg, input) {
|
|
|
494
880
|
repos: reposSummary.map((r) => ({
|
|
495
881
|
kind: r.kind,
|
|
496
882
|
id: r.id,
|
|
883
|
+
source: r.source,
|
|
497
884
|
headSha: r.headSha,
|
|
498
885
|
fetchedAt: createdAt,
|
|
499
886
|
notes: r.notes,
|
|
@@ -505,18 +892,18 @@ export async function createBundle(cfg, input) {
|
|
|
505
892
|
includeCode: true,
|
|
506
893
|
},
|
|
507
894
|
};
|
|
508
|
-
await writeManifest(
|
|
895
|
+
await writeManifest(tmpPaths.manifestPath, manifest);
|
|
509
896
|
// Guides.
|
|
510
|
-
await writeAgentsMd(
|
|
897
|
+
await writeAgentsMd(tmpPaths.agentsPath);
|
|
511
898
|
await writeStartHereMd({
|
|
512
|
-
targetPath:
|
|
899
|
+
targetPath: tmpPaths.startHerePath,
|
|
513
900
|
bundleId,
|
|
514
901
|
repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
515
902
|
libraries: librariesSummary,
|
|
516
903
|
});
|
|
517
904
|
// Overview (S2: factual-only with evidence pointers).
|
|
518
905
|
const perRepoOverviews = reposSummary
|
|
519
|
-
.filter((r) => r.kind === 'github')
|
|
906
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
520
907
|
.map((r) => {
|
|
521
908
|
const repoId = r.id;
|
|
522
909
|
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
@@ -524,29 +911,54 @@ export async function createBundle(cfg, input) {
|
|
|
524
911
|
});
|
|
525
912
|
const overviewMd = await generateOverviewMarkdown({
|
|
526
913
|
bundleId,
|
|
527
|
-
bundleRootDir:
|
|
914
|
+
bundleRootDir: tmpPaths.rootDir,
|
|
528
915
|
repos: perRepoOverviews,
|
|
529
916
|
libraries: librariesSummary,
|
|
530
917
|
});
|
|
531
|
-
await writeOverviewFile(
|
|
918
|
+
await writeOverviewFile(tmpPaths.overviewPath, overviewMd);
|
|
532
919
|
// Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
|
|
533
920
|
await generateFactsBestEffort({
|
|
534
921
|
bundleId,
|
|
535
|
-
bundleRoot:
|
|
922
|
+
bundleRoot: tmpPaths.rootDir,
|
|
536
923
|
files: allIngestedFiles,
|
|
537
924
|
mode: cfg.analysisMode,
|
|
538
925
|
});
|
|
539
|
-
//
|
|
540
|
-
|
|
541
|
-
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
542
|
-
}
|
|
543
|
-
// CRITICAL: Validate bundle completeness before finalizing
|
|
544
|
-
const validation = await validateBundleCompleteness(paths.rootDir);
|
|
926
|
+
// CRITICAL: Validate bundle completeness BEFORE atomic move
|
|
927
|
+
const validation = await validateBundleCompleteness(tmpPaths.rootDir);
|
|
545
928
|
if (!validation.isValid) {
|
|
546
929
|
const errorMsg = `Bundle creation incomplete. Missing: ${validation.missingComponents.join(', ')}`;
|
|
547
930
|
logger.error(errorMsg);
|
|
548
931
|
throw new Error(errorMsg);
|
|
549
932
|
}
|
|
933
|
+
// ATOMIC OPERATION: Move from temp to final location
|
|
934
|
+
// This is atomic on most filesystems - bundle becomes visible only when complete
|
|
935
|
+
logger.info(`Moving bundle ${bundleId} from temp to final location (atomic)`);
|
|
936
|
+
await ensureDir(effectiveStorageDir);
|
|
937
|
+
try {
|
|
938
|
+
// Try rename first (atomic, but only works on same filesystem)
|
|
939
|
+
await fs.rename(tmpPaths.rootDir, finalPaths.rootDir);
|
|
940
|
+
logger.info(`Bundle ${bundleId} moved atomically to ${finalPaths.rootDir}`);
|
|
941
|
+
}
|
|
942
|
+
catch (renameErr) {
|
|
943
|
+
// Rename failed - likely cross-filesystem. Fall back to copy+delete
|
|
944
|
+
const errCode = renameErr.code;
|
|
945
|
+
if (errCode === 'EXDEV') {
|
|
946
|
+
logger.warn(`Cross-filesystem move detected for ${bundleId}, falling back to copy`);
|
|
947
|
+
await copyDir(tmpPaths.rootDir, finalPaths.rootDir);
|
|
948
|
+
await rmIfExists(tmpPaths.rootDir);
|
|
949
|
+
logger.info(`Bundle ${bundleId} copied to ${finalPaths.rootDir}`);
|
|
950
|
+
}
|
|
951
|
+
else {
|
|
952
|
+
// Some other error, rethrow
|
|
953
|
+
throw renameErr;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
// Mirror to backup storage directories (non-blocking on failures)
|
|
957
|
+
if (cfg.storageDirs.length > 1) {
|
|
958
|
+
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
959
|
+
}
|
|
960
|
+
// Update de-duplication index (best-effort). This is intentionally after atomic move.
|
|
961
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
|
|
550
962
|
const summary = {
|
|
551
963
|
bundleId,
|
|
552
964
|
createdAt,
|
|
@@ -557,15 +969,19 @@ export async function createBundle(cfg, input) {
|
|
|
557
969
|
return summary;
|
|
558
970
|
}
|
|
559
971
|
catch (err) {
|
|
560
|
-
//
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
await cleanupFailedBundle(cfg, bundleId);
|
|
564
|
-
}
|
|
972
|
+
// Clean up temp directory on failure
|
|
973
|
+
logger.error(`Bundle creation failed, cleaning up temp: ${bundleId}`, err instanceof Error ? err : undefined);
|
|
974
|
+
await rmIfExists(tmpPaths.rootDir);
|
|
565
975
|
// Enhance error message
|
|
566
976
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
567
977
|
throw new Error(`Failed to create bundle: ${errorMsg}`);
|
|
568
978
|
}
|
|
979
|
+
finally {
|
|
980
|
+
// Ensure temp directory is cleaned up (double safety)
|
|
981
|
+
await rmIfExists(tmpPaths.rootDir).catch(() => {
|
|
982
|
+
// Ignore cleanup errors
|
|
983
|
+
});
|
|
984
|
+
}
|
|
569
985
|
}
|
|
570
986
|
/** Check if a bundle has upstream changes without applying updates. */
|
|
571
987
|
export async function checkForUpdates(cfg, bundleId) {
|
|
@@ -592,6 +1008,14 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
592
1008
|
hasUpdates = true;
|
|
593
1009
|
details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
|
|
594
1010
|
}
|
|
1011
|
+
else if (repoInput.kind === 'local') {
|
|
1012
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1013
|
+
const repoId = `${owner}/${repo}`;
|
|
1014
|
+
// We can't reliably detect whether local files changed without scanning; assume possible update.
|
|
1015
|
+
const prev = manifest.repos.find((r) => r.id === repoId);
|
|
1016
|
+
details.push({ repoId, currentSha: prev?.headSha, changed: true });
|
|
1017
|
+
hasUpdates = true;
|
|
1018
|
+
}
|
|
595
1019
|
else {
|
|
596
1020
|
// DeepWiki: can't easily detect changes, assume possible update
|
|
597
1021
|
details.push({ repoId: repoInput.url, changed: true });
|
|
@@ -600,6 +1024,257 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
600
1024
|
}
|
|
601
1025
|
return { hasUpdates, details };
|
|
602
1026
|
}
|
|
1027
|
+
async function scanBundleIndexableFiles(params) {
|
|
1028
|
+
const files = [];
|
|
1029
|
+
const skipped = [];
|
|
1030
|
+
let totalBytes = 0;
|
|
1031
|
+
const pushFile = async (f) => {
|
|
1032
|
+
const st = await statOrNull(f.absPath);
|
|
1033
|
+
if (!st?.isFile())
|
|
1034
|
+
return;
|
|
1035
|
+
if (st.size > params.cfg.maxFileBytes) {
|
|
1036
|
+
skipped.push(`${f.bundleRelPosix} (too large: ${st.size} bytes)`);
|
|
1037
|
+
return;
|
|
1038
|
+
}
|
|
1039
|
+
if (totalBytes + st.size > params.cfg.maxTotalBytes) {
|
|
1040
|
+
skipped.push(`(bundle maxTotalBytes reached) stopped before: ${f.bundleRelPosix}`);
|
|
1041
|
+
return;
|
|
1042
|
+
}
|
|
1043
|
+
const text = await readUtf8OrNull(f.absPath);
|
|
1044
|
+
if (text === null) {
|
|
1045
|
+
skipped.push(`${f.bundleRelPosix} (unreadable utf8)`);
|
|
1046
|
+
return;
|
|
1047
|
+
}
|
|
1048
|
+
const normalized = text.replace(/\r\n/g, '\n');
|
|
1049
|
+
const sha256 = sha256Text(normalized);
|
|
1050
|
+
totalBytes += st.size;
|
|
1051
|
+
files.push({
|
|
1052
|
+
repoId: f.repoId,
|
|
1053
|
+
kind: f.kind,
|
|
1054
|
+
repoRelativePath: f.repoRelativePath,
|
|
1055
|
+
bundleNormRelativePath: f.bundleRelPosix,
|
|
1056
|
+
bundleNormAbsPath: f.absPath,
|
|
1057
|
+
sha256,
|
|
1058
|
+
bytes: st.size,
|
|
1059
|
+
});
|
|
1060
|
+
};
|
|
1061
|
+
// 1) repos/<owner>/<repo>/norm/** (github/local)
|
|
1062
|
+
try {
|
|
1063
|
+
const owners = await fs.readdir(params.reposDir, { withFileTypes: true });
|
|
1064
|
+
for (const ownerEnt of owners) {
|
|
1065
|
+
if (!ownerEnt.isDirectory())
|
|
1066
|
+
continue;
|
|
1067
|
+
const owner = ownerEnt.name;
|
|
1068
|
+
const ownerDir = path.join(params.reposDir, owner);
|
|
1069
|
+
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1070
|
+
for (const repoEnt of repos) {
|
|
1071
|
+
if (!repoEnt.isDirectory())
|
|
1072
|
+
continue;
|
|
1073
|
+
const repo = repoEnt.name;
|
|
1074
|
+
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1075
|
+
const normSt = await statOrNull(normDir);
|
|
1076
|
+
if (!normSt?.isDirectory())
|
|
1077
|
+
continue;
|
|
1078
|
+
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1079
|
+
const repoRel = wf.relPosix;
|
|
1080
|
+
const kind = classifyIngestedFileKind(repoRel);
|
|
1081
|
+
const bundleRel = `repos/${owner}/${repo}/norm/${repoRel}`;
|
|
1082
|
+
await pushFile({
|
|
1083
|
+
repoId: `${owner}/${repo}`,
|
|
1084
|
+
kind,
|
|
1085
|
+
repoRelativePath: repoRel,
|
|
1086
|
+
bundleRelPosix: bundleRel,
|
|
1087
|
+
absPath: wf.absPath,
|
|
1088
|
+
});
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
catch {
|
|
1094
|
+
// ignore missing repos dir
|
|
1095
|
+
}
|
|
1096
|
+
// 2) libraries/context7/** (docs-only)
|
|
1097
|
+
const context7Dir = path.join(params.librariesDir, 'context7');
|
|
1098
|
+
const ctxSt = await statOrNull(context7Dir);
|
|
1099
|
+
if (ctxSt?.isDirectory()) {
|
|
1100
|
+
for await (const wf of walkFilesNoIgnore(context7Dir)) {
|
|
1101
|
+
// Match original ingestion: only .md docs are indexed from Context7.
|
|
1102
|
+
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1103
|
+
continue;
|
|
1104
|
+
const relFromLibRoot = wf.relPosix; // relative to libraries/context7
|
|
1105
|
+
const parts = relFromLibRoot.split('/').filter(Boolean);
|
|
1106
|
+
const fileName = parts[parts.length - 1] ?? '';
|
|
1107
|
+
const dirParts = parts.slice(0, -1);
|
|
1108
|
+
let repoId = 'context7:unknown';
|
|
1109
|
+
if (dirParts[0] === '_unresolved' && dirParts[1]) {
|
|
1110
|
+
repoId = `context7:unresolved/${dirParts[1]}`;
|
|
1111
|
+
}
|
|
1112
|
+
else if (dirParts.length > 0) {
|
|
1113
|
+
repoId = `context7:/${dirParts.join('/')}`;
|
|
1114
|
+
}
|
|
1115
|
+
const bundleRel = `libraries/context7/${relFromLibRoot}`;
|
|
1116
|
+
await pushFile({
|
|
1117
|
+
repoId,
|
|
1118
|
+
kind: 'doc',
|
|
1119
|
+
repoRelativePath: fileName,
|
|
1120
|
+
bundleRelPosix: bundleRel,
|
|
1121
|
+
absPath: wf.absPath,
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
// 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
|
|
1126
|
+
const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
|
|
1127
|
+
const dwSt = await statOrNull(deepwikiDir);
|
|
1128
|
+
if (dwSt?.isDirectory()) {
|
|
1129
|
+
// Only walk the norm subtrees.
|
|
1130
|
+
const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
|
|
1131
|
+
for (const ownerEnt of owners) {
|
|
1132
|
+
if (!ownerEnt.isDirectory())
|
|
1133
|
+
continue;
|
|
1134
|
+
const owner = ownerEnt.name;
|
|
1135
|
+
const ownerDir = path.join(deepwikiDir, owner);
|
|
1136
|
+
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1137
|
+
for (const repoEnt of repos) {
|
|
1138
|
+
if (!repoEnt.isDirectory())
|
|
1139
|
+
continue;
|
|
1140
|
+
const repo = repoEnt.name;
|
|
1141
|
+
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1142
|
+
const normSt = await statOrNull(normDir);
|
|
1143
|
+
if (!normSt?.isDirectory())
|
|
1144
|
+
continue;
|
|
1145
|
+
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1146
|
+
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1147
|
+
continue;
|
|
1148
|
+
const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
|
|
1149
|
+
await pushFile({
|
|
1150
|
+
repoId: `deepwiki:${owner}/${repo}`,
|
|
1151
|
+
kind: 'doc',
|
|
1152
|
+
repoRelativePath: wf.relPosix,
|
|
1153
|
+
bundleRelPosix: bundleRel,
|
|
1154
|
+
absPath: wf.absPath,
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
return { files, totalBytes, skipped };
|
|
1161
|
+
}
|
|
1162
|
+
export async function repairBundle(cfg, bundleId, options) {
|
|
1163
|
+
const mode = options?.mode ?? 'repair';
|
|
1164
|
+
const rebuildIndexOpt = options?.rebuildIndex ?? true;
|
|
1165
|
+
const rebuildGuidesOpt = options?.rebuildGuides ?? true;
|
|
1166
|
+
const rebuildOverviewOpt = options?.rebuildOverview ?? true;
|
|
1167
|
+
const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
|
|
1168
|
+
if (!storageDir) {
|
|
1169
|
+
throw new Error(`Bundle not found: ${bundleId}`);
|
|
1170
|
+
}
|
|
1171
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
1172
|
+
const before = await validateBundleCompleteness(paths.rootDir);
|
|
1173
|
+
if (mode === 'validate') {
|
|
1174
|
+
return {
|
|
1175
|
+
bundleId,
|
|
1176
|
+
mode,
|
|
1177
|
+
repaired: false,
|
|
1178
|
+
actionsTaken: [],
|
|
1179
|
+
before,
|
|
1180
|
+
after: before,
|
|
1181
|
+
};
|
|
1182
|
+
}
|
|
1183
|
+
// Manifest is required for safe repairs (no fetching/re-ingest).
|
|
1184
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
1185
|
+
const actionsTaken = [];
|
|
1186
|
+
// Determine what needs repair.
|
|
1187
|
+
const stAgents = await statOrNull(paths.agentsPath);
|
|
1188
|
+
const stStartHere = await statOrNull(paths.startHerePath);
|
|
1189
|
+
const stOverview = await statOrNull(paths.overviewPath);
|
|
1190
|
+
const stIndex = await statOrNull(paths.searchDbPath);
|
|
1191
|
+
const needsAgents = !stAgents || stAgents.size === 0;
|
|
1192
|
+
const needsStartHere = !stStartHere || stStartHere.size === 0;
|
|
1193
|
+
const needsOverview = !stOverview || stOverview.size === 0;
|
|
1194
|
+
const needsIndex = !stIndex || stIndex.size === 0;
|
|
1195
|
+
// Scan bundle files once if needed for index/overview.
|
|
1196
|
+
let scanned = null;
|
|
1197
|
+
const needScan = (rebuildIndexOpt && needsIndex) || (rebuildOverviewOpt && needsOverview);
|
|
1198
|
+
if (needScan) {
|
|
1199
|
+
scanned = await scanBundleIndexableFiles({
|
|
1200
|
+
cfg,
|
|
1201
|
+
bundleRootDir: paths.rootDir,
|
|
1202
|
+
reposDir: paths.reposDir,
|
|
1203
|
+
librariesDir: paths.librariesDir,
|
|
1204
|
+
});
|
|
1205
|
+
if (scanned.skipped.length) {
|
|
1206
|
+
actionsTaken.push(`scan: skipped ${scanned.skipped.length} file(s)`);
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
if (rebuildIndexOpt && needsIndex) {
|
|
1210
|
+
const files = scanned?.files ?? [];
|
|
1211
|
+
await rebuildIndex(paths.searchDbPath, files, { includeDocs: true, includeCode: true });
|
|
1212
|
+
actionsTaken.push(`rebuildIndex: indexed ${files.length} file(s)`);
|
|
1213
|
+
}
|
|
1214
|
+
if (rebuildGuidesOpt && needsAgents) {
|
|
1215
|
+
await writeAgentsMd(paths.agentsPath);
|
|
1216
|
+
actionsTaken.push('writeAgentsMd');
|
|
1217
|
+
}
|
|
1218
|
+
if (rebuildGuidesOpt && needsStartHere) {
|
|
1219
|
+
await writeStartHereMd({
|
|
1220
|
+
targetPath: paths.startHerePath,
|
|
1221
|
+
bundleId,
|
|
1222
|
+
repos: (manifest.repos ?? []).map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
1223
|
+
libraries: manifest.libraries,
|
|
1224
|
+
});
|
|
1225
|
+
actionsTaken.push('writeStartHereMd');
|
|
1226
|
+
}
|
|
1227
|
+
if (rebuildOverviewOpt && needsOverview) {
|
|
1228
|
+
const allFiles = scanned?.files ?? [];
|
|
1229
|
+
const perRepoOverviews = (manifest.repos ?? [])
|
|
1230
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
1231
|
+
.map((r) => {
|
|
1232
|
+
const repoId = r.id;
|
|
1233
|
+
const repoFiles = allFiles.filter((f) => f.repoId === repoId);
|
|
1234
|
+
return { repoId, headSha: r.headSha, files: repoFiles };
|
|
1235
|
+
});
|
|
1236
|
+
const md = await generateOverviewMarkdown({
|
|
1237
|
+
bundleId,
|
|
1238
|
+
bundleRootDir: paths.rootDir,
|
|
1239
|
+
repos: perRepoOverviews,
|
|
1240
|
+
libraries: manifest.libraries,
|
|
1241
|
+
});
|
|
1242
|
+
await writeOverviewFile(paths.overviewPath, md);
|
|
1243
|
+
actionsTaken.push('writeOverviewFile');
|
|
1244
|
+
}
|
|
1245
|
+
let updatedAt;
|
|
1246
|
+
if (actionsTaken.length > 0) {
|
|
1247
|
+
updatedAt = nowIso();
|
|
1248
|
+
const fingerprint = manifest.fingerprint ??
|
|
1249
|
+
computeCreateInputFingerprint({
|
|
1250
|
+
repos: manifest.inputs.repos,
|
|
1251
|
+
libraries: manifest.inputs.libraries,
|
|
1252
|
+
topics: manifest.inputs.topics,
|
|
1253
|
+
});
|
|
1254
|
+
const newManifest = {
|
|
1255
|
+
...manifest,
|
|
1256
|
+
updatedAt,
|
|
1257
|
+
fingerprint,
|
|
1258
|
+
};
|
|
1259
|
+
await writeManifest(paths.manifestPath, newManifest);
|
|
1260
|
+
// Keep the de-duplication index fresh (best-effort).
|
|
1261
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
1262
|
+
// Mirror to backup storage directories (non-blocking on failures)
|
|
1263
|
+
if (cfg.storageDirs.length > 1) {
|
|
1264
|
+
await mirrorBundleToBackups(storageDir, cfg.storageDirs, bundleId);
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
const after = await validateBundleCompleteness(paths.rootDir);
|
|
1268
|
+
return {
|
|
1269
|
+
bundleId,
|
|
1270
|
+
mode,
|
|
1271
|
+
repaired: actionsTaken.length > 0,
|
|
1272
|
+
actionsTaken,
|
|
1273
|
+
before,
|
|
1274
|
+
after,
|
|
1275
|
+
updatedAt,
|
|
1276
|
+
};
|
|
1277
|
+
}
|
|
603
1278
|
export async function updateBundle(cfg, bundleId, options) {
|
|
604
1279
|
// Use effective storage dir (falls back if primary unavailable)
|
|
605
1280
|
const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
|
|
@@ -626,7 +1301,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
626
1301
|
if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
|
|
627
1302
|
changed = true;
|
|
628
1303
|
}
|
|
629
|
-
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
1304
|
+
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
630
1305
|
cfg,
|
|
631
1306
|
bundleId,
|
|
632
1307
|
storageDir: effectiveStorageDir,
|
|
@@ -634,11 +1309,31 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
634
1309
|
repo,
|
|
635
1310
|
ref: repoInput.ref,
|
|
636
1311
|
});
|
|
637
|
-
if (prev?.headSha && headSha !== prev.headSha) {
|
|
1312
|
+
if (prev?.headSha && headSha && headSha !== prev.headSha) {
|
|
638
1313
|
changed = true;
|
|
639
1314
|
}
|
|
1315
|
+
// If we had to fall back to an archive, treat as changed (we don't have git metadata).
|
|
1316
|
+
if (source === 'archive') {
|
|
1317
|
+
changed = true;
|
|
1318
|
+
}
|
|
1319
|
+
allIngestedFiles.push(...files);
|
|
1320
|
+
reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
|
|
1321
|
+
}
|
|
1322
|
+
else if (repoInput.kind === 'local') {
|
|
1323
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1324
|
+
const repoId = `${owner}/${repo}`;
|
|
1325
|
+
const { files, skipped } = await ingestLocalRepo({
|
|
1326
|
+
cfg,
|
|
1327
|
+
bundleId,
|
|
1328
|
+
storageDir: effectiveStorageDir,
|
|
1329
|
+
owner,
|
|
1330
|
+
repo,
|
|
1331
|
+
localPath: repoInput.path,
|
|
1332
|
+
ref: repoInput.ref,
|
|
1333
|
+
});
|
|
640
1334
|
allIngestedFiles.push(...files);
|
|
641
|
-
reposSummary.push({ kind: '
|
|
1335
|
+
reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
|
|
1336
|
+
changed = true;
|
|
642
1337
|
}
|
|
643
1338
|
else {
|
|
644
1339
|
// DeepWiki integration: fetch and convert to Markdown.
|
|
@@ -651,6 +1346,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
651
1346
|
reposSummary.push({
|
|
652
1347
|
kind: 'deepwiki',
|
|
653
1348
|
id: deepwikiResult.summary.repoId,
|
|
1349
|
+
source: 'deepwiki',
|
|
654
1350
|
notes: deepwikiResult.summary.notes,
|
|
655
1351
|
});
|
|
656
1352
|
// Always mark as changed for DeepWiki since we can't easily detect content changes.
|
|
@@ -676,12 +1372,19 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
676
1372
|
includeDocs: manifest.index.includeDocs,
|
|
677
1373
|
includeCode: manifest.index.includeCode,
|
|
678
1374
|
});
|
|
1375
|
+
const fingerprint = computeCreateInputFingerprint({
|
|
1376
|
+
repos: manifest.inputs.repos,
|
|
1377
|
+
libraries: manifest.inputs.libraries,
|
|
1378
|
+
topics: manifest.inputs.topics,
|
|
1379
|
+
});
|
|
679
1380
|
const newManifest = {
|
|
680
1381
|
...manifest,
|
|
681
1382
|
updatedAt,
|
|
1383
|
+
fingerprint,
|
|
682
1384
|
repos: reposSummary.map((r) => ({
|
|
683
1385
|
kind: r.kind,
|
|
684
1386
|
id: r.id,
|
|
1387
|
+
source: r.source,
|
|
685
1388
|
headSha: r.headSha,
|
|
686
1389
|
fetchedAt: updatedAt,
|
|
687
1390
|
notes: r.notes,
|
|
@@ -698,7 +1401,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
698
1401
|
libraries: librariesSummary,
|
|
699
1402
|
});
|
|
700
1403
|
const perRepoOverviews = reposSummary
|
|
701
|
-
.filter((r) => r.kind === 'github')
|
|
1404
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
702
1405
|
.map((r) => {
|
|
703
1406
|
const repoId = r.id;
|
|
704
1407
|
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
@@ -722,6 +1425,8 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
722
1425
|
if (cfg.storageDirs.length > 1) {
|
|
723
1426
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
724
1427
|
}
|
|
1428
|
+
// Keep the de-duplication index fresh (best-effort).
|
|
1429
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
725
1430
|
const summary = {
|
|
726
1431
|
bundleId,
|
|
727
1432
|
createdAt: manifest.createdAt,
|
|
@@ -731,11 +1436,22 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
731
1436
|
};
|
|
732
1437
|
return { summary, changed };
|
|
733
1438
|
}
|
|
1439
|
+
/**
|
|
1440
|
+
* Check if a string is a valid UUID (v4 format).
|
|
1441
|
+
* Bundle IDs should be UUIDs with dashes.
|
|
1442
|
+
*/
|
|
1443
|
+
function isValidBundleId(id) {
|
|
1444
|
+
// UUID v4 format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx
|
|
1445
|
+
const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1446
|
+
return uuidRegex.test(id);
|
|
1447
|
+
}
|
|
734
1448
|
/** List bundles from a single storage directory. */
|
|
735
1449
|
export async function listBundles(storageDir) {
|
|
736
1450
|
try {
|
|
737
1451
|
const entries = await fs.readdir(storageDir, { withFileTypes: true });
|
|
738
|
-
return entries
|
|
1452
|
+
return entries
|
|
1453
|
+
.filter((e) => e.isDirectory() && isValidBundleId(e.name))
|
|
1454
|
+
.map((e) => e.name);
|
|
739
1455
|
}
|
|
740
1456
|
catch {
|
|
741
1457
|
return [];
|
|
@@ -781,18 +1497,45 @@ export async function clearBundle(storageDir, bundleId) {
|
|
|
781
1497
|
const p = getBundlePaths(storageDir, bundleId);
|
|
782
1498
|
await rmIfExists(p.rootDir);
|
|
783
1499
|
}
|
|
784
|
-
/**
|
|
1500
|
+
/**
|
|
1501
|
+
* Clear bundle from ALL storage directories (mirror delete).
|
|
1502
|
+
* Uses fast rename + background deletion to avoid blocking.
|
|
1503
|
+
*/
|
|
785
1504
|
export async function clearBundleMulti(storageDirs, bundleId) {
|
|
786
1505
|
let deleted = false;
|
|
787
1506
|
for (const dir of storageDirs) {
|
|
788
1507
|
try {
|
|
789
|
-
|
|
1508
|
+
const paths = getBundlePaths(dir, bundleId);
|
|
1509
|
+
// Check if the bundle directory exists
|
|
1510
|
+
try {
|
|
1511
|
+
await fs.stat(paths.rootDir);
|
|
1512
|
+
}
|
|
1513
|
+
catch {
|
|
1514
|
+
// Directory doesn't exist, skip
|
|
1515
|
+
continue;
|
|
1516
|
+
}
|
|
1517
|
+
// Fast deletion strategy: rename first (instant), then delete in background
|
|
1518
|
+
const deletingPath = `${paths.rootDir}.deleting.${Date.now()}`;
|
|
1519
|
+
try {
|
|
1520
|
+
// Rename is atomic and instant on most filesystems
|
|
1521
|
+
await fs.rename(paths.rootDir, deletingPath);
|
|
1522
|
+
deleted = true;
|
|
1523
|
+
// Background deletion (fire-and-forget)
|
|
1524
|
+
// The renamed directory is invisible to listBundles (not a valid UUID)
|
|
1525
|
+
rmIfExists(deletingPath).catch((err) => {
|
|
1526
|
+
logger.warn(`Background deletion failed for ${bundleId}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1529
|
+
catch (err) {
|
|
1530
|
+
// Rename failed (maybe concurrent deletion), try direct delete as fallback
|
|
1531
|
+
logger.warn(`Rename failed for ${bundleId}, falling back to direct delete`);
|
|
790
1532
|
await clearBundle(dir, bundleId);
|
|
791
1533
|
deleted = true;
|
|
792
1534
|
}
|
|
793
1535
|
}
|
|
794
|
-
catch {
|
|
1536
|
+
catch (err) {
|
|
795
1537
|
// Skip unavailable paths
|
|
1538
|
+
logger.debug(`Failed to delete bundle from ${dir}: ${err instanceof Error ? err.message : String(err)}`);
|
|
796
1539
|
}
|
|
797
1540
|
}
|
|
798
1541
|
return deleted;
|