preflight-mcp 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +195 -27
- package/README.zh-CN.md +277 -308
- package/dist/bundle/deepwiki.js +1 -1
- package/dist/bundle/github.js +100 -15
- package/dist/bundle/githubArchive.js +82 -0
- package/dist/bundle/ingest.js +2 -2
- package/dist/bundle/paths.js +23 -0
- package/dist/bundle/service.js +701 -25
- package/dist/config.js +1 -0
- package/dist/context7/client.js +1 -1
- package/dist/core/concurrency-limiter.js +100 -0
- package/dist/core/scheduler.js +4 -1
- package/dist/jobs/tmp-cleanup-job.js +71 -0
- package/dist/mcp/errorKinds.js +54 -0
- package/dist/mcp/uris.js +28 -8
- package/dist/search/sqliteFts.js +68 -36
- package/dist/server/optimized-server.js +4 -0
- package/dist/server.js +455 -279
- package/dist/tools/searchByTags.js +80 -0
- package/package.json +26 -1
package/dist/bundle/service.js
CHANGED
|
@@ -3,7 +3,8 @@ import fs from 'node:fs/promises';
|
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { logger } from '../logging/logger.js';
|
|
5
5
|
import { getLocalHeadSha, getRemoteHeadSha, parseOwnerRepo, shallowClone, toCloneUrl, } from './github.js';
|
|
6
|
-
import {
|
|
6
|
+
import { downloadAndExtractGitHubArchive } from './githubArchive.js';
|
|
7
|
+
import { classifyIngestedFileKind, ingestRepoToBundle } from './ingest.js';
|
|
7
8
|
import { writeManifest, readManifest } from './manifest.js';
|
|
8
9
|
import { getBundlePaths, repoMetaPath, repoNormDir, repoRawDir, repoRootDir } from './paths.js';
|
|
9
10
|
import { writeAgentsMd, writeStartHereMd } from './guides.js';
|
|
@@ -13,22 +14,261 @@ import { ingestContext7Libraries } from './context7.js';
|
|
|
13
14
|
import { ingestDeepWikiRepo } from './deepwiki.js';
|
|
14
15
|
import { analyzeBundleStatic } from './analysis.js';
|
|
15
16
|
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
17
|
+
import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
|
|
18
|
+
const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
|
|
19
|
+
function sha256Hex(text) {
|
|
20
|
+
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
21
|
+
}
|
|
22
|
+
function normalizeList(values) {
|
|
23
|
+
return (values ?? [])
|
|
24
|
+
.map((s) => s.trim())
|
|
25
|
+
.filter(Boolean)
|
|
26
|
+
.map((s) => s.toLowerCase())
|
|
27
|
+
.sort();
|
|
28
|
+
}
|
|
29
|
+
function normalizeDeepWikiUrl(raw) {
|
|
30
|
+
const trimmed = raw.trim();
|
|
31
|
+
try {
|
|
32
|
+
const u = new URL(trimmed);
|
|
33
|
+
u.hash = '';
|
|
34
|
+
// Normalize host and strip trailing slash.
|
|
35
|
+
u.host = u.host.toLowerCase();
|
|
36
|
+
u.pathname = u.pathname.replace(/\/+$/g, '');
|
|
37
|
+
return u.toString();
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return trimmed;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function canonicalizeCreateInput(input) {
|
|
44
|
+
const repos = input.repos
|
|
45
|
+
.map((r) => {
|
|
46
|
+
if (r.kind === 'github') {
|
|
47
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
48
|
+
return {
|
|
49
|
+
kind: 'github',
|
|
50
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
51
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
if (r.kind === 'local') {
|
|
55
|
+
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
56
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
57
|
+
return {
|
|
58
|
+
kind: 'github',
|
|
59
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
60
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
kind: 'deepwiki',
|
|
65
|
+
url: normalizeDeepWikiUrl(r.url),
|
|
66
|
+
};
|
|
67
|
+
})
|
|
68
|
+
.sort((a, b) => {
|
|
69
|
+
const ka = a.kind === 'github' ? `github:${a.repo}:${a.ref ?? ''}` : `deepwiki:${a.url}`;
|
|
70
|
+
const kb = b.kind === 'github' ? `github:${b.repo}:${b.ref ?? ''}` : `deepwiki:${b.url}`;
|
|
71
|
+
return ka.localeCompare(kb);
|
|
72
|
+
});
|
|
73
|
+
return {
|
|
74
|
+
schemaVersion: 1,
|
|
75
|
+
repos,
|
|
76
|
+
libraries: normalizeList(input.libraries),
|
|
77
|
+
topics: normalizeList(input.topics),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
export function computeCreateInputFingerprint(input) {
|
|
81
|
+
const canonical = canonicalizeCreateInput(input);
|
|
82
|
+
return sha256Hex(JSON.stringify(canonical));
|
|
83
|
+
}
|
|
84
|
+
function dedupIndexPath(storageDir) {
|
|
85
|
+
return path.join(storageDir, DEDUP_INDEX_FILE);
|
|
86
|
+
}
|
|
87
|
+
async function readDedupIndex(storageDir) {
|
|
88
|
+
const p = dedupIndexPath(storageDir);
|
|
89
|
+
try {
|
|
90
|
+
const raw = await fs.readFile(p, 'utf8');
|
|
91
|
+
const parsed = JSON.parse(raw);
|
|
92
|
+
if (parsed.schemaVersion !== 1 || typeof parsed.byFingerprint !== 'object' || !parsed.byFingerprint) {
|
|
93
|
+
return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
|
|
94
|
+
}
|
|
95
|
+
return parsed;
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
async function writeDedupIndex(storageDir, idx) {
|
|
102
|
+
const p = dedupIndexPath(storageDir);
|
|
103
|
+
await ensureDir(path.dirname(p));
|
|
104
|
+
// Use atomic write (write to temp file, then rename) to prevent corruption
|
|
105
|
+
const tmpPath = `${p}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
|
|
106
|
+
try {
|
|
107
|
+
await fs.writeFile(tmpPath, JSON.stringify(idx, null, 2) + '\n', 'utf8');
|
|
108
|
+
// Atomic rename on POSIX; near-atomic on Windows
|
|
109
|
+
await fs.rename(tmpPath, p);
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
// Clean up temp file on error
|
|
113
|
+
try {
|
|
114
|
+
await fs.unlink(tmpPath);
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
// Ignore cleanup errors
|
|
118
|
+
}
|
|
119
|
+
throw err;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
|
|
123
|
+
for (const storageDir of cfg.storageDirs) {
|
|
124
|
+
try {
|
|
125
|
+
const parentAvailable = await isParentAvailable(storageDir);
|
|
126
|
+
if (!parentAvailable)
|
|
127
|
+
continue;
|
|
128
|
+
await ensureDir(storageDir);
|
|
129
|
+
const idx = await readDedupIndex(storageDir);
|
|
130
|
+
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
|
|
131
|
+
idx.updatedAt = nowIso();
|
|
132
|
+
await writeDedupIndex(storageDir, idx);
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// best-effort
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
async function readBundleSummary(cfg, bundleId) {
|
|
140
|
+
const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
|
|
141
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
142
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
143
|
+
return {
|
|
144
|
+
bundleId: manifest.bundleId,
|
|
145
|
+
createdAt: manifest.createdAt,
|
|
146
|
+
updatedAt: manifest.updatedAt,
|
|
147
|
+
repos: manifest.repos.map((r) => ({
|
|
148
|
+
kind: r.kind,
|
|
149
|
+
id: r.id,
|
|
150
|
+
source: r.source,
|
|
151
|
+
headSha: r.headSha,
|
|
152
|
+
notes: r.notes,
|
|
153
|
+
})),
|
|
154
|
+
libraries: manifest.libraries,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
export async function findBundleByInputs(cfg, input) {
|
|
158
|
+
const fingerprint = computeCreateInputFingerprint(input);
|
|
159
|
+
return findExistingBundleByFingerprint(cfg, fingerprint);
|
|
160
|
+
}
|
|
161
|
+
async function findExistingBundleByFingerprint(cfg, fingerprint) {
|
|
162
|
+
// Fast path: consult any available dedup index.
|
|
163
|
+
for (const storageDir of cfg.storageDirs) {
|
|
164
|
+
try {
|
|
165
|
+
if (!(await isPathAvailable(storageDir)))
|
|
166
|
+
continue;
|
|
167
|
+
const idx = await readDedupIndex(storageDir);
|
|
168
|
+
const hit = idx.byFingerprint[fingerprint];
|
|
169
|
+
if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
|
|
170
|
+
return hit.bundleId;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// ignore
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// Slow path: scan manifests (works even for bundles created before fingerprints existed).
|
|
178
|
+
let best = null;
|
|
179
|
+
for (const storageDir of cfg.storageDirs) {
|
|
180
|
+
if (!(await isPathAvailable(storageDir)))
|
|
181
|
+
continue;
|
|
182
|
+
const ids = await listBundles(storageDir);
|
|
183
|
+
for (const id of ids) {
|
|
184
|
+
try {
|
|
185
|
+
const paths = getBundlePaths(storageDir, id);
|
|
186
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
187
|
+
const fp = computeCreateInputFingerprint({
|
|
188
|
+
repos: manifest.inputs.repos,
|
|
189
|
+
libraries: manifest.inputs.libraries,
|
|
190
|
+
topics: manifest.inputs.topics,
|
|
191
|
+
});
|
|
192
|
+
if (fp === fingerprint) {
|
|
193
|
+
const updatedAt = manifest.updatedAt;
|
|
194
|
+
if (!best || new Date(updatedAt) > new Date(best.updatedAt)) {
|
|
195
|
+
best = { bundleId: manifest.bundleId, updatedAt };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
// ignore corrupt bundles
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (best) {
|
|
205
|
+
// Seed index for next time (best-effort).
|
|
206
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, best.bundleId, best.updatedAt);
|
|
207
|
+
return best.bundleId;
|
|
208
|
+
}
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
16
211
|
async function ensureDir(p) {
|
|
17
212
|
await fs.mkdir(p, { recursive: true });
|
|
18
213
|
}
|
|
19
214
|
function nowIso() {
|
|
20
215
|
return new Date().toISOString();
|
|
21
216
|
}
|
|
217
|
+
function toPosix(p) {
|
|
218
|
+
return p.replaceAll('\\', '/');
|
|
219
|
+
}
|
|
220
|
+
function sha256Text(text) {
|
|
221
|
+
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
222
|
+
}
|
|
223
|
+
async function statOrNull(p) {
|
|
224
|
+
try {
|
|
225
|
+
return await fs.stat(p);
|
|
226
|
+
}
|
|
227
|
+
catch {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
async function readUtf8OrNull(p) {
|
|
232
|
+
try {
|
|
233
|
+
return await fs.readFile(p, 'utf8');
|
|
234
|
+
}
|
|
235
|
+
catch {
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
async function* walkFilesNoIgnore(rootDir) {
|
|
240
|
+
const stack = [rootDir];
|
|
241
|
+
while (stack.length) {
|
|
242
|
+
const dir = stack.pop();
|
|
243
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
244
|
+
for (const ent of entries) {
|
|
245
|
+
const abs = path.join(dir, ent.name);
|
|
246
|
+
const rel = toPosix(path.relative(rootDir, abs));
|
|
247
|
+
if (ent.isDirectory()) {
|
|
248
|
+
stack.push(abs);
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
if (!ent.isFile())
|
|
252
|
+
continue;
|
|
253
|
+
yield { absPath: abs, relPosix: rel };
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
22
257
|
async function writeRepoMeta(params) {
|
|
23
258
|
await ensureDir(path.dirname(params.metaPath));
|
|
24
259
|
const obj = {
|
|
25
260
|
repoId: params.repoId,
|
|
26
261
|
cloneUrl: params.cloneUrl,
|
|
27
|
-
headSha: params.headSha,
|
|
28
262
|
fetchedAt: params.fetchedAt,
|
|
29
263
|
ingestedFiles: params.ingestedFiles,
|
|
30
264
|
skipped: params.skipped,
|
|
31
265
|
};
|
|
266
|
+
if (params.headSha)
|
|
267
|
+
obj.headSha = params.headSha;
|
|
268
|
+
if (params.source)
|
|
269
|
+
obj.source = params.source;
|
|
270
|
+
if (params.ref)
|
|
271
|
+
obj.ref = params.ref;
|
|
32
272
|
await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
|
|
33
273
|
}
|
|
34
274
|
async function rmIfExists(p) {
|
|
@@ -205,26 +445,47 @@ async function mirrorBundleToBackups(primaryDir, backupDirs, bundleId) {
|
|
|
205
445
|
const srcPath = path.join(primaryDir, bundleId);
|
|
206
446
|
const mirrored = [];
|
|
207
447
|
const failed = [];
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
448
|
+
// Mirror to all backup dirs in parallel for better performance
|
|
449
|
+
const mirrorPromises = backupDirs
|
|
450
|
+
.filter(dir => dir !== primaryDir) // Skip primary
|
|
451
|
+
.map(async (backupDir) => {
|
|
211
452
|
const destPath = path.join(backupDir, bundleId);
|
|
212
453
|
try {
|
|
213
454
|
// Check if backup location is available
|
|
214
455
|
const parentAvailable = await isParentAvailable(destPath);
|
|
215
456
|
if (!parentAvailable) {
|
|
216
|
-
|
|
217
|
-
continue;
|
|
457
|
+
return { success: false, path: backupDir, error: 'Mount not available' };
|
|
218
458
|
}
|
|
219
459
|
// Ensure backup dir exists
|
|
220
460
|
await ensureDir(backupDir);
|
|
221
461
|
// Remove old and copy new
|
|
222
462
|
await rmIfExists(destPath);
|
|
223
463
|
await copyDir(srcPath, destPath);
|
|
224
|
-
|
|
464
|
+
return { success: true, path: backupDir };
|
|
225
465
|
}
|
|
226
466
|
catch (err) {
|
|
227
|
-
|
|
467
|
+
return {
|
|
468
|
+
success: false,
|
|
469
|
+
path: backupDir,
|
|
470
|
+
error: err instanceof Error ? err.message : String(err)
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
});
|
|
474
|
+
// Wait for all mirror operations to complete
|
|
475
|
+
const results = await Promise.allSettled(mirrorPromises);
|
|
476
|
+
for (const result of results) {
|
|
477
|
+
if (result.status === 'fulfilled') {
|
|
478
|
+
const { success, path: backupPath, error } = result.value;
|
|
479
|
+
if (success) {
|
|
480
|
+
mirrored.push(backupPath);
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
failed.push({ path: backupPath, error: error ?? 'Unknown error' });
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
else {
|
|
487
|
+
// Promise rejection (shouldn't happen with try-catch, but handle it)
|
|
488
|
+
failed.push({ path: 'unknown', error: result.reason?.message ?? String(result.reason) });
|
|
228
489
|
}
|
|
229
490
|
}
|
|
230
491
|
return { mirrored, failed };
|
|
@@ -330,13 +591,91 @@ async function syncStaleBackups(sourceDir, allDirs, bundleId) {
|
|
|
330
591
|
}
|
|
331
592
|
}
|
|
332
593
|
}
|
|
594
|
+
async function writeLocalRepoMeta(params) {
|
|
595
|
+
await ensureDir(path.dirname(params.metaPath));
|
|
596
|
+
const obj = {
|
|
597
|
+
repoId: params.repoId,
|
|
598
|
+
source: 'local',
|
|
599
|
+
localPath: params.localPath,
|
|
600
|
+
ref: params.ref,
|
|
601
|
+
fetchedAt: params.fetchedAt,
|
|
602
|
+
ingestedFiles: params.ingestedFiles,
|
|
603
|
+
skipped: params.skipped,
|
|
604
|
+
};
|
|
605
|
+
await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
|
|
606
|
+
}
|
|
607
|
+
async function ingestLocalRepo(params) {
|
|
608
|
+
const repoId = `${params.owner}/${params.repo}`;
|
|
609
|
+
const repoRoot = path.resolve(params.localPath);
|
|
610
|
+
const st = await fs.stat(repoRoot);
|
|
611
|
+
if (!st.isDirectory()) {
|
|
612
|
+
throw new Error(`Local repo path is not a directory: ${repoRoot}`);
|
|
613
|
+
}
|
|
614
|
+
const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
|
|
615
|
+
const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
|
|
616
|
+
const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
|
|
617
|
+
await rmIfExists(rawDest);
|
|
618
|
+
await rmIfExists(normDest);
|
|
619
|
+
await ensureDir(rawDest);
|
|
620
|
+
await ensureDir(normDest);
|
|
621
|
+
const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
|
|
622
|
+
const ingested = await ingestRepoToBundle({
|
|
623
|
+
repoId,
|
|
624
|
+
repoRoot,
|
|
625
|
+
rawDestRoot: rawDest,
|
|
626
|
+
normDestRoot: normDest,
|
|
627
|
+
bundleNormPrefixPosix,
|
|
628
|
+
options: {
|
|
629
|
+
maxFileBytes: params.cfg.maxFileBytes,
|
|
630
|
+
maxTotalBytes: params.cfg.maxTotalBytes,
|
|
631
|
+
},
|
|
632
|
+
});
|
|
633
|
+
const fetchedAt = nowIso();
|
|
634
|
+
await writeLocalRepoMeta({
|
|
635
|
+
metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
|
|
636
|
+
repoId,
|
|
637
|
+
localPath: repoRoot,
|
|
638
|
+
ref: params.ref,
|
|
639
|
+
fetchedAt,
|
|
640
|
+
ingestedFiles: ingested.files.length,
|
|
641
|
+
skipped: ingested.skipped,
|
|
642
|
+
});
|
|
643
|
+
return { files: ingested.files, skipped: ingested.skipped };
|
|
644
|
+
}
|
|
333
645
|
async function cloneAndIngestGitHubRepo(params) {
|
|
334
646
|
const repoId = `${params.owner}/${params.repo}`;
|
|
335
647
|
const cloneUrl = toCloneUrl({ owner: params.owner, repo: params.repo });
|
|
336
|
-
const
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
648
|
+
const tmpBase = path.join(params.cfg.tmpDir, 'checkouts', params.bundleId, `${params.owner}__${params.repo}`);
|
|
649
|
+
const tmpCheckoutGit = tmpBase;
|
|
650
|
+
const tmpArchiveDir = `${tmpBase}__archive`;
|
|
651
|
+
await rmIfExists(tmpCheckoutGit);
|
|
652
|
+
await rmIfExists(tmpArchiveDir);
|
|
653
|
+
let repoRootForIngest = tmpCheckoutGit;
|
|
654
|
+
let headSha;
|
|
655
|
+
const notes = [];
|
|
656
|
+
let source = 'git';
|
|
657
|
+
let fetchedAt = nowIso();
|
|
658
|
+
let refUsed = params.ref;
|
|
659
|
+
try {
|
|
660
|
+
await shallowClone(cloneUrl, tmpCheckoutGit, { ref: params.ref, timeoutMs: params.cfg.gitCloneTimeoutMs });
|
|
661
|
+
headSha = await getLocalHeadSha(tmpCheckoutGit);
|
|
662
|
+
}
|
|
663
|
+
catch (err) {
|
|
664
|
+
// Fallback: GitHub archive download (zipball) + extract.
|
|
665
|
+
source = 'archive';
|
|
666
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
667
|
+
notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
|
|
668
|
+
const archive = await downloadAndExtractGitHubArchive({
|
|
669
|
+
cfg: params.cfg,
|
|
670
|
+
owner: params.owner,
|
|
671
|
+
repo: params.repo,
|
|
672
|
+
ref: params.ref,
|
|
673
|
+
destDir: tmpArchiveDir,
|
|
674
|
+
});
|
|
675
|
+
repoRootForIngest = archive.repoRoot;
|
|
676
|
+
fetchedAt = archive.fetchedAt;
|
|
677
|
+
refUsed = archive.refUsed;
|
|
678
|
+
}
|
|
340
679
|
const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
|
|
341
680
|
const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
|
|
342
681
|
const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
|
|
@@ -347,7 +686,7 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
347
686
|
const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
|
|
348
687
|
const ingested = await ingestRepoToBundle({
|
|
349
688
|
repoId,
|
|
350
|
-
repoRoot:
|
|
689
|
+
repoRoot: repoRootForIngest,
|
|
351
690
|
rawDestRoot: rawDest,
|
|
352
691
|
normDestRoot: normDest,
|
|
353
692
|
bundleNormPrefixPosix,
|
|
@@ -356,7 +695,6 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
356
695
|
maxTotalBytes: params.cfg.maxTotalBytes,
|
|
357
696
|
},
|
|
358
697
|
});
|
|
359
|
-
const fetchedAt = nowIso();
|
|
360
698
|
await writeRepoMeta({
|
|
361
699
|
metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
|
|
362
700
|
repoId,
|
|
@@ -365,9 +703,12 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
365
703
|
fetchedAt,
|
|
366
704
|
ingestedFiles: ingested.files.length,
|
|
367
705
|
skipped: ingested.skipped,
|
|
706
|
+
source,
|
|
707
|
+
ref: refUsed,
|
|
368
708
|
});
|
|
369
|
-
await rmIfExists(
|
|
370
|
-
|
|
709
|
+
await rmIfExists(tmpCheckoutGit);
|
|
710
|
+
await rmIfExists(tmpArchiveDir);
|
|
711
|
+
return { headSha, files: ingested.files, skipped: ingested.skipped, notes, source };
|
|
371
712
|
}
|
|
372
713
|
function groupFilesByRepoId(files) {
|
|
373
714
|
const byRepo = new Map();
|
|
@@ -401,7 +742,28 @@ async function generateFactsBestEffort(params) {
|
|
|
401
742
|
logger.error('Static analysis exception', err instanceof Error ? err : undefined);
|
|
402
743
|
}
|
|
403
744
|
}
|
|
404
|
-
export async function createBundle(cfg, input) {
|
|
745
|
+
export async function createBundle(cfg, input, options) {
|
|
746
|
+
// Apply concurrency limiting to prevent DoS attacks
|
|
747
|
+
return await bundleCreationLimiter.run(async () => {
|
|
748
|
+
return await createBundleInternal(cfg, input, options);
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
async function createBundleInternal(cfg, input, options) {
|
|
752
|
+
const fingerprint = computeCreateInputFingerprint(input);
|
|
753
|
+
const ifExists = options?.ifExists ?? 'error';
|
|
754
|
+
if (ifExists !== 'createNew') {
|
|
755
|
+
const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
|
|
756
|
+
if (existing) {
|
|
757
|
+
if (ifExists === 'returnExisting') {
|
|
758
|
+
return await readBundleSummary(cfg, existing);
|
|
759
|
+
}
|
|
760
|
+
if (ifExists === 'updateExisting') {
|
|
761
|
+
const { summary } = await updateBundle(cfg, existing);
|
|
762
|
+
return summary;
|
|
763
|
+
}
|
|
764
|
+
throw new Error(`Bundle already exists for these inputs: ${existing}`);
|
|
765
|
+
}
|
|
766
|
+
}
|
|
405
767
|
const bundleId = crypto.randomUUID();
|
|
406
768
|
const createdAt = nowIso();
|
|
407
769
|
// Use effective storage dir (falls back if primary unavailable)
|
|
@@ -417,7 +779,7 @@ export async function createBundle(cfg, input) {
|
|
|
417
779
|
for (const repoInput of input.repos) {
|
|
418
780
|
if (repoInput.kind === 'github') {
|
|
419
781
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
420
|
-
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
782
|
+
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
421
783
|
cfg,
|
|
422
784
|
bundleId,
|
|
423
785
|
storageDir: effectiveStorageDir,
|
|
@@ -426,7 +788,27 @@ export async function createBundle(cfg, input) {
|
|
|
426
788
|
ref: repoInput.ref,
|
|
427
789
|
});
|
|
428
790
|
allIngestedFiles.push(...files);
|
|
429
|
-
reposSummary.push({
|
|
791
|
+
reposSummary.push({
|
|
792
|
+
kind: 'github',
|
|
793
|
+
id: `${owner}/${repo}`,
|
|
794
|
+
source,
|
|
795
|
+
headSha,
|
|
796
|
+
notes: [...notes, ...skipped].slice(0, 50),
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
else if (repoInput.kind === 'local') {
|
|
800
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
801
|
+
const { files, skipped } = await ingestLocalRepo({
|
|
802
|
+
cfg,
|
|
803
|
+
bundleId,
|
|
804
|
+
storageDir: effectiveStorageDir,
|
|
805
|
+
owner,
|
|
806
|
+
repo,
|
|
807
|
+
localPath: repoInput.path,
|
|
808
|
+
ref: repoInput.ref,
|
|
809
|
+
});
|
|
810
|
+
allIngestedFiles.push(...files);
|
|
811
|
+
reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
|
|
430
812
|
}
|
|
431
813
|
else {
|
|
432
814
|
// DeepWiki integration: fetch and convert to Markdown.
|
|
@@ -439,6 +821,7 @@ export async function createBundle(cfg, input) {
|
|
|
439
821
|
reposSummary.push({
|
|
440
822
|
kind: 'deepwiki',
|
|
441
823
|
id: deepwikiResult.summary.repoId,
|
|
824
|
+
source: 'deepwiki',
|
|
442
825
|
notes: deepwikiResult.summary.notes,
|
|
443
826
|
});
|
|
444
827
|
}
|
|
@@ -482,6 +865,7 @@ export async function createBundle(cfg, input) {
|
|
|
482
865
|
bundleId,
|
|
483
866
|
createdAt,
|
|
484
867
|
updatedAt: createdAt,
|
|
868
|
+
fingerprint,
|
|
485
869
|
displayName,
|
|
486
870
|
description,
|
|
487
871
|
tags,
|
|
@@ -494,6 +878,7 @@ export async function createBundle(cfg, input) {
|
|
|
494
878
|
repos: reposSummary.map((r) => ({
|
|
495
879
|
kind: r.kind,
|
|
496
880
|
id: r.id,
|
|
881
|
+
source: r.source,
|
|
497
882
|
headSha: r.headSha,
|
|
498
883
|
fetchedAt: createdAt,
|
|
499
884
|
notes: r.notes,
|
|
@@ -516,7 +901,7 @@ export async function createBundle(cfg, input) {
|
|
|
516
901
|
});
|
|
517
902
|
// Overview (S2: factual-only with evidence pointers).
|
|
518
903
|
const perRepoOverviews = reposSummary
|
|
519
|
-
.filter((r) => r.kind === 'github')
|
|
904
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
520
905
|
.map((r) => {
|
|
521
906
|
const repoId = r.id;
|
|
522
907
|
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
@@ -547,6 +932,8 @@ export async function createBundle(cfg, input) {
|
|
|
547
932
|
logger.error(errorMsg);
|
|
548
933
|
throw new Error(errorMsg);
|
|
549
934
|
}
|
|
935
|
+
// Update de-duplication index (best-effort). This is intentionally after validation.
|
|
936
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
|
|
550
937
|
const summary = {
|
|
551
938
|
bundleId,
|
|
552
939
|
createdAt,
|
|
@@ -592,6 +979,14 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
592
979
|
hasUpdates = true;
|
|
593
980
|
details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
|
|
594
981
|
}
|
|
982
|
+
else if (repoInput.kind === 'local') {
|
|
983
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
984
|
+
const repoId = `${owner}/${repo}`;
|
|
985
|
+
// We can't reliably detect whether local files changed without scanning; assume possible update.
|
|
986
|
+
const prev = manifest.repos.find((r) => r.id === repoId);
|
|
987
|
+
details.push({ repoId, currentSha: prev?.headSha, changed: true });
|
|
988
|
+
hasUpdates = true;
|
|
989
|
+
}
|
|
595
990
|
else {
|
|
596
991
|
// DeepWiki: can't easily detect changes, assume possible update
|
|
597
992
|
details.push({ repoId: repoInput.url, changed: true });
|
|
@@ -600,6 +995,257 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
600
995
|
}
|
|
601
996
|
return { hasUpdates, details };
|
|
602
997
|
}
|
|
998
|
+
async function scanBundleIndexableFiles(params) {
|
|
999
|
+
const files = [];
|
|
1000
|
+
const skipped = [];
|
|
1001
|
+
let totalBytes = 0;
|
|
1002
|
+
const pushFile = async (f) => {
|
|
1003
|
+
const st = await statOrNull(f.absPath);
|
|
1004
|
+
if (!st?.isFile())
|
|
1005
|
+
return;
|
|
1006
|
+
if (st.size > params.cfg.maxFileBytes) {
|
|
1007
|
+
skipped.push(`${f.bundleRelPosix} (too large: ${st.size} bytes)`);
|
|
1008
|
+
return;
|
|
1009
|
+
}
|
|
1010
|
+
if (totalBytes + st.size > params.cfg.maxTotalBytes) {
|
|
1011
|
+
skipped.push(`(bundle maxTotalBytes reached) stopped before: ${f.bundleRelPosix}`);
|
|
1012
|
+
return;
|
|
1013
|
+
}
|
|
1014
|
+
const text = await readUtf8OrNull(f.absPath);
|
|
1015
|
+
if (text === null) {
|
|
1016
|
+
skipped.push(`${f.bundleRelPosix} (unreadable utf8)`);
|
|
1017
|
+
return;
|
|
1018
|
+
}
|
|
1019
|
+
const normalized = text.replace(/\r\n/g, '\n');
|
|
1020
|
+
const sha256 = sha256Text(normalized);
|
|
1021
|
+
totalBytes += st.size;
|
|
1022
|
+
files.push({
|
|
1023
|
+
repoId: f.repoId,
|
|
1024
|
+
kind: f.kind,
|
|
1025
|
+
repoRelativePath: f.repoRelativePath,
|
|
1026
|
+
bundleNormRelativePath: f.bundleRelPosix,
|
|
1027
|
+
bundleNormAbsPath: f.absPath,
|
|
1028
|
+
sha256,
|
|
1029
|
+
bytes: st.size,
|
|
1030
|
+
});
|
|
1031
|
+
};
|
|
1032
|
+
// 1) repos/<owner>/<repo>/norm/** (github/local)
|
|
1033
|
+
try {
|
|
1034
|
+
const owners = await fs.readdir(params.reposDir, { withFileTypes: true });
|
|
1035
|
+
for (const ownerEnt of owners) {
|
|
1036
|
+
if (!ownerEnt.isDirectory())
|
|
1037
|
+
continue;
|
|
1038
|
+
const owner = ownerEnt.name;
|
|
1039
|
+
const ownerDir = path.join(params.reposDir, owner);
|
|
1040
|
+
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1041
|
+
for (const repoEnt of repos) {
|
|
1042
|
+
if (!repoEnt.isDirectory())
|
|
1043
|
+
continue;
|
|
1044
|
+
const repo = repoEnt.name;
|
|
1045
|
+
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1046
|
+
const normSt = await statOrNull(normDir);
|
|
1047
|
+
if (!normSt?.isDirectory())
|
|
1048
|
+
continue;
|
|
1049
|
+
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1050
|
+
const repoRel = wf.relPosix;
|
|
1051
|
+
const kind = classifyIngestedFileKind(repoRel);
|
|
1052
|
+
const bundleRel = `repos/${owner}/${repo}/norm/${repoRel}`;
|
|
1053
|
+
await pushFile({
|
|
1054
|
+
repoId: `${owner}/${repo}`,
|
|
1055
|
+
kind,
|
|
1056
|
+
repoRelativePath: repoRel,
|
|
1057
|
+
bundleRelPosix: bundleRel,
|
|
1058
|
+
absPath: wf.absPath,
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
catch {
|
|
1065
|
+
// ignore missing repos dir
|
|
1066
|
+
}
|
|
1067
|
+
// 2) libraries/context7/** (docs-only)
|
|
1068
|
+
const context7Dir = path.join(params.librariesDir, 'context7');
|
|
1069
|
+
const ctxSt = await statOrNull(context7Dir);
|
|
1070
|
+
if (ctxSt?.isDirectory()) {
|
|
1071
|
+
for await (const wf of walkFilesNoIgnore(context7Dir)) {
|
|
1072
|
+
// Match original ingestion: only .md docs are indexed from Context7.
|
|
1073
|
+
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1074
|
+
continue;
|
|
1075
|
+
const relFromLibRoot = wf.relPosix; // relative to libraries/context7
|
|
1076
|
+
const parts = relFromLibRoot.split('/').filter(Boolean);
|
|
1077
|
+
const fileName = parts[parts.length - 1] ?? '';
|
|
1078
|
+
const dirParts = parts.slice(0, -1);
|
|
1079
|
+
let repoId = 'context7:unknown';
|
|
1080
|
+
if (dirParts[0] === '_unresolved' && dirParts[1]) {
|
|
1081
|
+
repoId = `context7:unresolved/${dirParts[1]}`;
|
|
1082
|
+
}
|
|
1083
|
+
else if (dirParts.length > 0) {
|
|
1084
|
+
repoId = `context7:/${dirParts.join('/')}`;
|
|
1085
|
+
}
|
|
1086
|
+
const bundleRel = `libraries/context7/${relFromLibRoot}`;
|
|
1087
|
+
await pushFile({
|
|
1088
|
+
repoId,
|
|
1089
|
+
kind: 'doc',
|
|
1090
|
+
repoRelativePath: fileName,
|
|
1091
|
+
bundleRelPosix: bundleRel,
|
|
1092
|
+
absPath: wf.absPath,
|
|
1093
|
+
});
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
// 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
|
|
1097
|
+
const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
|
|
1098
|
+
const dwSt = await statOrNull(deepwikiDir);
|
|
1099
|
+
if (dwSt?.isDirectory()) {
|
|
1100
|
+
// Only walk the norm subtrees.
|
|
1101
|
+
const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
|
|
1102
|
+
for (const ownerEnt of owners) {
|
|
1103
|
+
if (!ownerEnt.isDirectory())
|
|
1104
|
+
continue;
|
|
1105
|
+
const owner = ownerEnt.name;
|
|
1106
|
+
const ownerDir = path.join(deepwikiDir, owner);
|
|
1107
|
+
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1108
|
+
for (const repoEnt of repos) {
|
|
1109
|
+
if (!repoEnt.isDirectory())
|
|
1110
|
+
continue;
|
|
1111
|
+
const repo = repoEnt.name;
|
|
1112
|
+
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1113
|
+
const normSt = await statOrNull(normDir);
|
|
1114
|
+
if (!normSt?.isDirectory())
|
|
1115
|
+
continue;
|
|
1116
|
+
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1117
|
+
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1118
|
+
continue;
|
|
1119
|
+
const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
|
|
1120
|
+
await pushFile({
|
|
1121
|
+
repoId: `deepwiki:${owner}/${repo}`,
|
|
1122
|
+
kind: 'doc',
|
|
1123
|
+
repoRelativePath: wf.relPosix,
|
|
1124
|
+
bundleRelPosix: bundleRel,
|
|
1125
|
+
absPath: wf.absPath,
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
return { files, totalBytes, skipped };
|
|
1132
|
+
}
|
|
1133
|
+
export async function repairBundle(cfg, bundleId, options) {
|
|
1134
|
+
const mode = options?.mode ?? 'repair';
|
|
1135
|
+
const rebuildIndexOpt = options?.rebuildIndex ?? true;
|
|
1136
|
+
const rebuildGuidesOpt = options?.rebuildGuides ?? true;
|
|
1137
|
+
const rebuildOverviewOpt = options?.rebuildOverview ?? true;
|
|
1138
|
+
const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
|
|
1139
|
+
if (!storageDir) {
|
|
1140
|
+
throw new Error(`Bundle not found: ${bundleId}`);
|
|
1141
|
+
}
|
|
1142
|
+
const paths = getBundlePaths(storageDir, bundleId);
|
|
1143
|
+
const before = await validateBundleCompleteness(paths.rootDir);
|
|
1144
|
+
if (mode === 'validate') {
|
|
1145
|
+
return {
|
|
1146
|
+
bundleId,
|
|
1147
|
+
mode,
|
|
1148
|
+
repaired: false,
|
|
1149
|
+
actionsTaken: [],
|
|
1150
|
+
before,
|
|
1151
|
+
after: before,
|
|
1152
|
+
};
|
|
1153
|
+
}
|
|
1154
|
+
// Manifest is required for safe repairs (no fetching/re-ingest).
|
|
1155
|
+
const manifest = await readManifest(paths.manifestPath);
|
|
1156
|
+
const actionsTaken = [];
|
|
1157
|
+
// Determine what needs repair.
|
|
1158
|
+
const stAgents = await statOrNull(paths.agentsPath);
|
|
1159
|
+
const stStartHere = await statOrNull(paths.startHerePath);
|
|
1160
|
+
const stOverview = await statOrNull(paths.overviewPath);
|
|
1161
|
+
const stIndex = await statOrNull(paths.searchDbPath);
|
|
1162
|
+
const needsAgents = !stAgents || stAgents.size === 0;
|
|
1163
|
+
const needsStartHere = !stStartHere || stStartHere.size === 0;
|
|
1164
|
+
const needsOverview = !stOverview || stOverview.size === 0;
|
|
1165
|
+
const needsIndex = !stIndex || stIndex.size === 0;
|
|
1166
|
+
// Scan bundle files once if needed for index/overview.
|
|
1167
|
+
let scanned = null;
|
|
1168
|
+
const needScan = (rebuildIndexOpt && needsIndex) || (rebuildOverviewOpt && needsOverview);
|
|
1169
|
+
if (needScan) {
|
|
1170
|
+
scanned = await scanBundleIndexableFiles({
|
|
1171
|
+
cfg,
|
|
1172
|
+
bundleRootDir: paths.rootDir,
|
|
1173
|
+
reposDir: paths.reposDir,
|
|
1174
|
+
librariesDir: paths.librariesDir,
|
|
1175
|
+
});
|
|
1176
|
+
if (scanned.skipped.length) {
|
|
1177
|
+
actionsTaken.push(`scan: skipped ${scanned.skipped.length} file(s)`);
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
if (rebuildIndexOpt && needsIndex) {
|
|
1181
|
+
const files = scanned?.files ?? [];
|
|
1182
|
+
await rebuildIndex(paths.searchDbPath, files, { includeDocs: true, includeCode: true });
|
|
1183
|
+
actionsTaken.push(`rebuildIndex: indexed ${files.length} file(s)`);
|
|
1184
|
+
}
|
|
1185
|
+
if (rebuildGuidesOpt && needsAgents) {
|
|
1186
|
+
await writeAgentsMd(paths.agentsPath);
|
|
1187
|
+
actionsTaken.push('writeAgentsMd');
|
|
1188
|
+
}
|
|
1189
|
+
if (rebuildGuidesOpt && needsStartHere) {
|
|
1190
|
+
await writeStartHereMd({
|
|
1191
|
+
targetPath: paths.startHerePath,
|
|
1192
|
+
bundleId,
|
|
1193
|
+
repos: (manifest.repos ?? []).map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
1194
|
+
libraries: manifest.libraries,
|
|
1195
|
+
});
|
|
1196
|
+
actionsTaken.push('writeStartHereMd');
|
|
1197
|
+
}
|
|
1198
|
+
if (rebuildOverviewOpt && needsOverview) {
|
|
1199
|
+
const allFiles = scanned?.files ?? [];
|
|
1200
|
+
const perRepoOverviews = (manifest.repos ?? [])
|
|
1201
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
1202
|
+
.map((r) => {
|
|
1203
|
+
const repoId = r.id;
|
|
1204
|
+
const repoFiles = allFiles.filter((f) => f.repoId === repoId);
|
|
1205
|
+
return { repoId, headSha: r.headSha, files: repoFiles };
|
|
1206
|
+
});
|
|
1207
|
+
const md = await generateOverviewMarkdown({
|
|
1208
|
+
bundleId,
|
|
1209
|
+
bundleRootDir: paths.rootDir,
|
|
1210
|
+
repos: perRepoOverviews,
|
|
1211
|
+
libraries: manifest.libraries,
|
|
1212
|
+
});
|
|
1213
|
+
await writeOverviewFile(paths.overviewPath, md);
|
|
1214
|
+
actionsTaken.push('writeOverviewFile');
|
|
1215
|
+
}
|
|
1216
|
+
let updatedAt;
|
|
1217
|
+
if (actionsTaken.length > 0) {
|
|
1218
|
+
updatedAt = nowIso();
|
|
1219
|
+
const fingerprint = manifest.fingerprint ??
|
|
1220
|
+
computeCreateInputFingerprint({
|
|
1221
|
+
repos: manifest.inputs.repos,
|
|
1222
|
+
libraries: manifest.inputs.libraries,
|
|
1223
|
+
topics: manifest.inputs.topics,
|
|
1224
|
+
});
|
|
1225
|
+
const newManifest = {
|
|
1226
|
+
...manifest,
|
|
1227
|
+
updatedAt,
|
|
1228
|
+
fingerprint,
|
|
1229
|
+
};
|
|
1230
|
+
await writeManifest(paths.manifestPath, newManifest);
|
|
1231
|
+
// Keep the de-duplication index fresh (best-effort).
|
|
1232
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
1233
|
+
// Mirror to backup storage directories (non-blocking on failures)
|
|
1234
|
+
if (cfg.storageDirs.length > 1) {
|
|
1235
|
+
await mirrorBundleToBackups(storageDir, cfg.storageDirs, bundleId);
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
const after = await validateBundleCompleteness(paths.rootDir);
|
|
1239
|
+
return {
|
|
1240
|
+
bundleId,
|
|
1241
|
+
mode,
|
|
1242
|
+
repaired: actionsTaken.length > 0,
|
|
1243
|
+
actionsTaken,
|
|
1244
|
+
before,
|
|
1245
|
+
after,
|
|
1246
|
+
updatedAt,
|
|
1247
|
+
};
|
|
1248
|
+
}
|
|
603
1249
|
export async function updateBundle(cfg, bundleId, options) {
|
|
604
1250
|
// Use effective storage dir (falls back if primary unavailable)
|
|
605
1251
|
const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
|
|
@@ -626,7 +1272,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
626
1272
|
if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
|
|
627
1273
|
changed = true;
|
|
628
1274
|
}
|
|
629
|
-
const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
|
|
1275
|
+
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
630
1276
|
cfg,
|
|
631
1277
|
bundleId,
|
|
632
1278
|
storageDir: effectiveStorageDir,
|
|
@@ -634,11 +1280,31 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
634
1280
|
repo,
|
|
635
1281
|
ref: repoInput.ref,
|
|
636
1282
|
});
|
|
637
|
-
if (prev?.headSha && headSha !== prev.headSha) {
|
|
1283
|
+
if (prev?.headSha && headSha && headSha !== prev.headSha) {
|
|
1284
|
+
changed = true;
|
|
1285
|
+
}
|
|
1286
|
+
// If we had to fall back to an archive, treat as changed (we don't have git metadata).
|
|
1287
|
+
if (source === 'archive') {
|
|
638
1288
|
changed = true;
|
|
639
1289
|
}
|
|
640
1290
|
allIngestedFiles.push(...files);
|
|
641
|
-
reposSummary.push({ kind: 'github', id: repoId, headSha, notes: skipped.slice(0, 50) });
|
|
1291
|
+
reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
|
|
1292
|
+
}
|
|
1293
|
+
else if (repoInput.kind === 'local') {
|
|
1294
|
+
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1295
|
+
const repoId = `${owner}/${repo}`;
|
|
1296
|
+
const { files, skipped } = await ingestLocalRepo({
|
|
1297
|
+
cfg,
|
|
1298
|
+
bundleId,
|
|
1299
|
+
storageDir: effectiveStorageDir,
|
|
1300
|
+
owner,
|
|
1301
|
+
repo,
|
|
1302
|
+
localPath: repoInput.path,
|
|
1303
|
+
ref: repoInput.ref,
|
|
1304
|
+
});
|
|
1305
|
+
allIngestedFiles.push(...files);
|
|
1306
|
+
reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
|
|
1307
|
+
changed = true;
|
|
642
1308
|
}
|
|
643
1309
|
else {
|
|
644
1310
|
// DeepWiki integration: fetch and convert to Markdown.
|
|
@@ -651,6 +1317,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
651
1317
|
reposSummary.push({
|
|
652
1318
|
kind: 'deepwiki',
|
|
653
1319
|
id: deepwikiResult.summary.repoId,
|
|
1320
|
+
source: 'deepwiki',
|
|
654
1321
|
notes: deepwikiResult.summary.notes,
|
|
655
1322
|
});
|
|
656
1323
|
// Always mark as changed for DeepWiki since we can't easily detect content changes.
|
|
@@ -676,12 +1343,19 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
676
1343
|
includeDocs: manifest.index.includeDocs,
|
|
677
1344
|
includeCode: manifest.index.includeCode,
|
|
678
1345
|
});
|
|
1346
|
+
const fingerprint = computeCreateInputFingerprint({
|
|
1347
|
+
repos: manifest.inputs.repos,
|
|
1348
|
+
libraries: manifest.inputs.libraries,
|
|
1349
|
+
topics: manifest.inputs.topics,
|
|
1350
|
+
});
|
|
679
1351
|
const newManifest = {
|
|
680
1352
|
...manifest,
|
|
681
1353
|
updatedAt,
|
|
1354
|
+
fingerprint,
|
|
682
1355
|
repos: reposSummary.map((r) => ({
|
|
683
1356
|
kind: r.kind,
|
|
684
1357
|
id: r.id,
|
|
1358
|
+
source: r.source,
|
|
685
1359
|
headSha: r.headSha,
|
|
686
1360
|
fetchedAt: updatedAt,
|
|
687
1361
|
notes: r.notes,
|
|
@@ -698,7 +1372,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
698
1372
|
libraries: librariesSummary,
|
|
699
1373
|
});
|
|
700
1374
|
const perRepoOverviews = reposSummary
|
|
701
|
-
.filter((r) => r.kind === 'github')
|
|
1375
|
+
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
702
1376
|
.map((r) => {
|
|
703
1377
|
const repoId = r.id;
|
|
704
1378
|
const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
|
|
@@ -722,6 +1396,8 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
722
1396
|
if (cfg.storageDirs.length > 1) {
|
|
723
1397
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
724
1398
|
}
|
|
1399
|
+
// Keep the de-duplication index fresh (best-effort).
|
|
1400
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
725
1401
|
const summary = {
|
|
726
1402
|
bundleId,
|
|
727
1403
|
createdAt: manifest.createdAt,
|