preflight-mcp 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -142
- package/README.zh-CN.md +141 -124
- package/dist/ast/treeSitter.js +588 -0
- package/dist/bundle/analysis.js +47 -0
- package/dist/bundle/context7.js +65 -36
- package/dist/bundle/facts.js +829 -0
- package/dist/bundle/github.js +34 -3
- package/dist/bundle/githubArchive.js +102 -29
- package/dist/bundle/overview.js +226 -48
- package/dist/bundle/service.js +250 -130
- package/dist/config.js +30 -3
- package/dist/context7/client.js +5 -2
- package/dist/evidence/dependencyGraph.js +1136 -0
- package/dist/http/server.js +109 -0
- package/dist/jobs/progressTracker.js +191 -0
- package/dist/search/sqliteFts.js +150 -10
- package/dist/server.js +340 -326
- package/dist/trace/service.js +108 -0
- package/dist/trace/store.js +170 -0
- package/package.json +4 -2
- package/dist/bundle/deepwiki.js +0 -206
package/dist/bundle/service.js
CHANGED
|
@@ -11,10 +11,10 @@ import { writeAgentsMd, writeStartHereMd } from './guides.js';
|
|
|
11
11
|
import { generateOverviewMarkdown, writeOverviewFile } from './overview.js';
|
|
12
12
|
import { rebuildIndex } from '../search/sqliteFts.js';
|
|
13
13
|
import { ingestContext7Libraries } from './context7.js';
|
|
14
|
-
import { ingestDeepWikiRepo } from './deepwiki.js';
|
|
15
14
|
import { analyzeBundleStatic } from './analysis.js';
|
|
16
15
|
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
17
16
|
import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
|
|
17
|
+
import { getProgressTracker, calcPercent } from '../jobs/progressTracker.js';
|
|
18
18
|
const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
|
|
19
19
|
function sha256Hex(text) {
|
|
20
20
|
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
@@ -26,48 +26,20 @@ function normalizeList(values) {
|
|
|
26
26
|
.map((s) => s.toLowerCase())
|
|
27
27
|
.sort();
|
|
28
28
|
}
|
|
29
|
-
function normalizeDeepWikiUrl(raw) {
|
|
30
|
-
const trimmed = raw.trim();
|
|
31
|
-
try {
|
|
32
|
-
const u = new URL(trimmed);
|
|
33
|
-
u.hash = '';
|
|
34
|
-
// Normalize host and strip trailing slash.
|
|
35
|
-
u.host = u.host.toLowerCase();
|
|
36
|
-
u.pathname = u.pathname.replace(/\/+$/g, '');
|
|
37
|
-
return u.toString();
|
|
38
|
-
}
|
|
39
|
-
catch {
|
|
40
|
-
return trimmed;
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
29
|
function canonicalizeCreateInput(input) {
|
|
44
30
|
const repos = input.repos
|
|
45
31
|
.map((r) => {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return {
|
|
49
|
-
kind: 'github',
|
|
50
|
-
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
51
|
-
ref: (r.ref ?? '').trim() || undefined,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
if (r.kind === 'local') {
|
|
55
|
-
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
56
|
-
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
57
|
-
return {
|
|
58
|
-
kind: 'github',
|
|
59
|
-
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
60
|
-
ref: (r.ref ?? '').trim() || undefined,
|
|
61
|
-
};
|
|
62
|
-
}
|
|
32
|
+
// For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
|
|
33
|
+
const { owner, repo } = parseOwnerRepo(r.repo);
|
|
63
34
|
return {
|
|
64
|
-
kind: '
|
|
65
|
-
|
|
35
|
+
kind: 'github',
|
|
36
|
+
repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
37
|
+
ref: (r.ref ?? '').trim() || undefined,
|
|
66
38
|
};
|
|
67
39
|
})
|
|
68
40
|
.sort((a, b) => {
|
|
69
|
-
const ka =
|
|
70
|
-
const kb =
|
|
41
|
+
const ka = `github:${a.repo}:${a.ref ?? ''}`;
|
|
42
|
+
const kb = `github:${b.repo}:${b.ref ?? ''}`;
|
|
71
43
|
return ka.localeCompare(kb);
|
|
72
44
|
});
|
|
73
45
|
return {
|
|
@@ -113,13 +85,13 @@ async function writeDedupIndex(storageDir, idx) {
|
|
|
113
85
|
try {
|
|
114
86
|
await fs.unlink(tmpPath);
|
|
115
87
|
}
|
|
116
|
-
catch {
|
|
117
|
-
|
|
88
|
+
catch (cleanupErr) {
|
|
89
|
+
logger.debug('Failed to cleanup temp dedup index file (non-critical)', cleanupErr instanceof Error ? cleanupErr : undefined);
|
|
118
90
|
}
|
|
119
91
|
throw err;
|
|
120
92
|
}
|
|
121
93
|
}
|
|
122
|
-
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
|
|
94
|
+
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt, status = 'complete') {
|
|
123
95
|
for (const storageDir of cfg.storageDirs) {
|
|
124
96
|
try {
|
|
125
97
|
const parentAvailable = await isParentAvailable(storageDir);
|
|
@@ -127,14 +99,105 @@ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpda
|
|
|
127
99
|
continue;
|
|
128
100
|
await ensureDir(storageDir);
|
|
129
101
|
const idx = await readDedupIndex(storageDir);
|
|
130
|
-
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
|
|
102
|
+
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt, status };
|
|
131
103
|
idx.updatedAt = nowIso();
|
|
132
104
|
await writeDedupIndex(storageDir, idx);
|
|
133
105
|
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
logger.debug(`Failed to update dedup index in ${storageDir} (best-effort)`, err instanceof Error ? err : undefined);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Set in-progress lock for a fingerprint. Returns false if already locked (not timed out).
|
|
113
|
+
*/
|
|
114
|
+
async function setInProgressLock(cfg, fingerprint, taskId, repos) {
|
|
115
|
+
const now = nowIso();
|
|
116
|
+
const nowMs = Date.now();
|
|
117
|
+
for (const storageDir of cfg.storageDirs) {
|
|
118
|
+
try {
|
|
119
|
+
if (!(await isPathAvailable(storageDir)))
|
|
120
|
+
continue;
|
|
121
|
+
await ensureDir(storageDir);
|
|
122
|
+
const idx = await readDedupIndex(storageDir);
|
|
123
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
124
|
+
// Check if there's an existing in-progress lock
|
|
125
|
+
if (existing?.status === 'in-progress' && existing.startedAt) {
|
|
126
|
+
const startedMs = new Date(existing.startedAt).getTime();
|
|
127
|
+
const elapsed = nowMs - startedMs;
|
|
128
|
+
// If lock hasn't timed out, return the existing entry
|
|
129
|
+
if (elapsed < cfg.inProgressLockTimeoutMs) {
|
|
130
|
+
return { locked: false, existingEntry: existing };
|
|
131
|
+
}
|
|
132
|
+
// Lock timed out - will be overwritten
|
|
133
|
+
logger.warn(`In-progress lock timed out for fingerprint ${fingerprint.slice(0, 8)}...`);
|
|
134
|
+
}
|
|
135
|
+
// Set new in-progress lock
|
|
136
|
+
idx.byFingerprint[fingerprint] = {
|
|
137
|
+
bundleId: '', // Will be set on completion
|
|
138
|
+
bundleUpdatedAt: now,
|
|
139
|
+
status: 'in-progress',
|
|
140
|
+
startedAt: now,
|
|
141
|
+
taskId,
|
|
142
|
+
repos,
|
|
143
|
+
};
|
|
144
|
+
idx.updatedAt = now;
|
|
145
|
+
await writeDedupIndex(storageDir, idx);
|
|
146
|
+
return { locked: true };
|
|
147
|
+
}
|
|
148
|
+
catch (err) {
|
|
149
|
+
logger.debug(`Failed to set in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// If we couldn't write to any storage, assume we can proceed (best-effort)
|
|
153
|
+
return { locked: true };
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Clear in-progress lock (on failure or completion with status='complete').
|
|
157
|
+
*/
|
|
158
|
+
async function clearInProgressLock(cfg, fingerprint) {
|
|
159
|
+
for (const storageDir of cfg.storageDirs) {
|
|
160
|
+
try {
|
|
161
|
+
if (!(await isPathAvailable(storageDir)))
|
|
162
|
+
continue;
|
|
163
|
+
const idx = await readDedupIndex(storageDir);
|
|
164
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
165
|
+
// Only clear if it's in-progress
|
|
166
|
+
if (existing?.status === 'in-progress') {
|
|
167
|
+
delete idx.byFingerprint[fingerprint];
|
|
168
|
+
idx.updatedAt = nowIso();
|
|
169
|
+
await writeDedupIndex(storageDir, idx);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
logger.debug(`Failed to clear in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Check if a fingerprint has an in-progress lock (not timed out).
|
|
179
|
+
*/
|
|
180
|
+
export async function checkInProgressLock(cfg, fingerprint) {
|
|
181
|
+
const nowMs = Date.now();
|
|
182
|
+
for (const storageDir of cfg.storageDirs) {
|
|
183
|
+
try {
|
|
184
|
+
if (!(await isPathAvailable(storageDir)))
|
|
185
|
+
continue;
|
|
186
|
+
const idx = await readDedupIndex(storageDir);
|
|
187
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
188
|
+
if (existing?.status === 'in-progress' && existing.startedAt) {
|
|
189
|
+
const startedMs = new Date(existing.startedAt).getTime();
|
|
190
|
+
const elapsed = nowMs - startedMs;
|
|
191
|
+
if (elapsed < cfg.inProgressLockTimeoutMs) {
|
|
192
|
+
return existing;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
134
196
|
catch {
|
|
135
|
-
//
|
|
197
|
+
// ignore
|
|
136
198
|
}
|
|
137
199
|
}
|
|
200
|
+
return null;
|
|
138
201
|
}
|
|
139
202
|
async function readBundleSummary(cfg, bundleId) {
|
|
140
203
|
const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
|
|
@@ -166,6 +229,9 @@ async function findExistingBundleByFingerprint(cfg, fingerprint) {
|
|
|
166
229
|
continue;
|
|
167
230
|
const idx = await readDedupIndex(storageDir);
|
|
168
231
|
const hit = idx.byFingerprint[fingerprint];
|
|
232
|
+
// Skip in-progress entries - they don't have a completed bundle yet
|
|
233
|
+
if (hit?.status === 'in-progress')
|
|
234
|
+
continue;
|
|
169
235
|
if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
|
|
170
236
|
return hit.bundleId;
|
|
171
237
|
}
|
|
@@ -350,6 +416,33 @@ async function validateBundleCompleteness(bundleRoot) {
|
|
|
350
416
|
missingComponents,
|
|
351
417
|
};
|
|
352
418
|
}
|
|
419
|
+
/**
|
|
420
|
+
* Assert that a bundle is complete and ready for operations.
|
|
421
|
+
* Throws an error with helpful guidance if the bundle is incomplete.
|
|
422
|
+
* Should be called at the entry point of tools that require a complete bundle
|
|
423
|
+
* (e.g., dependency graph, trace links, search).
|
|
424
|
+
*/
|
|
425
|
+
export async function assertBundleComplete(cfg, bundleId) {
|
|
426
|
+
const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
|
|
427
|
+
if (!storageDir) {
|
|
428
|
+
throw new Error(`Bundle not found: ${bundleId}`);
|
|
429
|
+
}
|
|
430
|
+
const bundleRoot = getBundlePaths(storageDir, bundleId).rootDir;
|
|
431
|
+
const { isValid, missingComponents } = await validateBundleCompleteness(bundleRoot);
|
|
432
|
+
if (!isValid) {
|
|
433
|
+
const issues = missingComponents.join('\n - ');
|
|
434
|
+
throw new Error(`Bundle is incomplete and cannot be used for this operation.\n\n` +
|
|
435
|
+
`Bundle ID: ${bundleId}\n` +
|
|
436
|
+
`Missing components:\n - ${issues}\n\n` +
|
|
437
|
+
`This usually happens when:\n` +
|
|
438
|
+
`1. Bundle creation was interrupted (timeout, network error, etc.)\n` +
|
|
439
|
+
`2. Bundle download is still in progress\n\n` +
|
|
440
|
+
`Suggested actions:\n` +
|
|
441
|
+
`- Use preflight_update_bundle with force:true to re-download the repository\n` +
|
|
442
|
+
`- Or use preflight_delete_bundle and preflight_create_bundle to start fresh\n` +
|
|
443
|
+
`- Check preflight_get_task_status if creation might still be in progress`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
353
446
|
/**
|
|
354
447
|
* Detect primary language from ingested files
|
|
355
448
|
*/
|
|
@@ -657,7 +750,14 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
657
750
|
let fetchedAt = nowIso();
|
|
658
751
|
let refUsed = params.ref;
|
|
659
752
|
try {
|
|
660
|
-
|
|
753
|
+
params.onProgress?.('cloning', 0, `Cloning ${repoId}...`);
|
|
754
|
+
await shallowClone(cloneUrl, tmpCheckoutGit, {
|
|
755
|
+
ref: params.ref,
|
|
756
|
+
timeoutMs: params.cfg.gitCloneTimeoutMs,
|
|
757
|
+
onProgress: (phase, percent, msg) => {
|
|
758
|
+
params.onProgress?.('cloning', percent, `${repoId}: ${msg}`);
|
|
759
|
+
},
|
|
760
|
+
});
|
|
661
761
|
headSha = await getLocalHeadSha(tmpCheckoutGit);
|
|
662
762
|
}
|
|
663
763
|
catch (err) {
|
|
@@ -665,12 +765,17 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
665
765
|
source = 'archive';
|
|
666
766
|
const msg = err instanceof Error ? err.message : String(err);
|
|
667
767
|
notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
|
|
768
|
+
params.onProgress?.('downloading', 0, `Downloading ${repoId} archive...`);
|
|
668
769
|
const archive = await downloadAndExtractGitHubArchive({
|
|
669
770
|
cfg: params.cfg,
|
|
670
771
|
owner: params.owner,
|
|
671
772
|
repo: params.repo,
|
|
672
773
|
ref: params.ref,
|
|
673
774
|
destDir: tmpArchiveDir,
|
|
775
|
+
onProgress: (downloaded, total, msg) => {
|
|
776
|
+
const percent = total ? Math.round((downloaded / total) * 100) : 0;
|
|
777
|
+
params.onProgress?.('downloading', percent, `${repoId}: ${msg}`);
|
|
778
|
+
},
|
|
674
779
|
});
|
|
675
780
|
repoRootForIngest = archive.repoRoot;
|
|
676
781
|
fetchedAt = archive.fetchedAt;
|
|
@@ -750,6 +855,15 @@ export async function createBundle(cfg, input, options) {
|
|
|
750
855
|
}
|
|
751
856
|
async function createBundleInternal(cfg, input, options) {
|
|
752
857
|
const fingerprint = computeCreateInputFingerprint(input);
|
|
858
|
+
const repoIds = input.repos.map((r) => r.repo);
|
|
859
|
+
const onProgress = options?.onProgress;
|
|
860
|
+
const tracker = getProgressTracker();
|
|
861
|
+
// Helper to report progress
|
|
862
|
+
const reportProgress = (phase, progress, message, total) => {
|
|
863
|
+
if (onProgress) {
|
|
864
|
+
onProgress(phase, progress, message, total);
|
|
865
|
+
}
|
|
866
|
+
};
|
|
753
867
|
const ifExists = options?.ifExists ?? 'error';
|
|
754
868
|
if (ifExists !== 'createNew') {
|
|
755
869
|
const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
|
|
@@ -764,6 +878,28 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
764
878
|
throw new Error(`Bundle already exists for these inputs: ${existing}`);
|
|
765
879
|
}
|
|
766
880
|
}
|
|
881
|
+
// Start tracking this task
|
|
882
|
+
const taskId = tracker.startTask(fingerprint, repoIds);
|
|
883
|
+
reportProgress('starting', 0, `Starting bundle creation for ${repoIds.join(', ')}`);
|
|
884
|
+
// Try to acquire in-progress lock
|
|
885
|
+
const lockResult = await setInProgressLock(cfg, fingerprint, taskId, repoIds);
|
|
886
|
+
if (!lockResult.locked) {
|
|
887
|
+
// Another task is already creating this bundle
|
|
888
|
+
const entry = lockResult.existingEntry;
|
|
889
|
+
const elapsedSec = entry.startedAt
|
|
890
|
+
? Math.round((Date.now() - new Date(entry.startedAt).getTime()) / 1000)
|
|
891
|
+
: 0;
|
|
892
|
+
const msg = `Bundle creation already in progress (taskId: ${entry.taskId}, started ${elapsedSec}s ago). ` +
|
|
893
|
+
`Use preflight_get_task_status to check progress.`;
|
|
894
|
+
// Throw a special error that can be caught and handled
|
|
895
|
+
const err = new Error(msg);
|
|
896
|
+
err.code = 'BUNDLE_IN_PROGRESS';
|
|
897
|
+
err.taskId = entry.taskId;
|
|
898
|
+
err.fingerprint = fingerprint;
|
|
899
|
+
err.repos = entry.repos;
|
|
900
|
+
err.startedAt = entry.startedAt;
|
|
901
|
+
throw err;
|
|
902
|
+
}
|
|
767
903
|
const bundleId = crypto.randomUUID();
|
|
768
904
|
const createdAt = nowIso();
|
|
769
905
|
// Use effective storage dir (falls back if primary unavailable)
|
|
@@ -778,9 +914,15 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
778
914
|
const reposSummary = [];
|
|
779
915
|
try {
|
|
780
916
|
// All operations happen in tmpPaths (temporary directory)
|
|
917
|
+
const totalRepos = input.repos.length;
|
|
918
|
+
let repoIndex = 0;
|
|
781
919
|
for (const repoInput of input.repos) {
|
|
920
|
+
repoIndex++;
|
|
921
|
+
const repoProgress = Math.round((repoIndex - 1) / totalRepos * 40); // 0-40% for repo fetching
|
|
782
922
|
if (repoInput.kind === 'github') {
|
|
783
923
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
924
|
+
reportProgress('cloning', repoProgress, `[${repoIndex}/${totalRepos}] Fetching ${owner}/${repo}...`);
|
|
925
|
+
tracker.updateProgress(taskId, 'cloning', repoProgress, `Fetching ${owner}/${repo}...`);
|
|
784
926
|
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
785
927
|
cfg,
|
|
786
928
|
bundleId,
|
|
@@ -788,6 +930,12 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
788
930
|
owner,
|
|
789
931
|
repo,
|
|
790
932
|
ref: repoInput.ref,
|
|
933
|
+
onProgress: (phase, percent, msg) => {
|
|
934
|
+
// Map clone/download progress to overall progress (0-40% range per repo)
|
|
935
|
+
const overallProgress = repoProgress + Math.round(percent * 0.4 / totalRepos);
|
|
936
|
+
reportProgress(phase, overallProgress, `[${repoIndex}/${totalRepos}] ${msg}`);
|
|
937
|
+
tracker.updateProgress(taskId, phase, overallProgress, msg);
|
|
938
|
+
},
|
|
791
939
|
});
|
|
792
940
|
allIngestedFiles.push(...files);
|
|
793
941
|
reposSummary.push({
|
|
@@ -798,8 +946,11 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
798
946
|
notes: [...notes, ...skipped].slice(0, 50),
|
|
799
947
|
});
|
|
800
948
|
}
|
|
801
|
-
else
|
|
949
|
+
else {
|
|
950
|
+
// Local repository
|
|
802
951
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
952
|
+
reportProgress('ingesting', repoProgress, `[${repoIndex}/${totalRepos}] Ingesting local ${owner}/${repo}...`);
|
|
953
|
+
tracker.updateProgress(taskId, 'ingesting', repoProgress, `Ingesting local ${owner}/${repo}...`);
|
|
803
954
|
const { files, skipped } = await ingestLocalRepo({
|
|
804
955
|
cfg,
|
|
805
956
|
bundleId,
|
|
@@ -812,21 +963,6 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
812
963
|
allIngestedFiles.push(...files);
|
|
813
964
|
reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
|
|
814
965
|
}
|
|
815
|
-
else {
|
|
816
|
-
// DeepWiki integration: fetch and convert to Markdown.
|
|
817
|
-
const deepwikiResult = await ingestDeepWikiRepo({
|
|
818
|
-
cfg,
|
|
819
|
-
bundlePaths: tmpPaths,
|
|
820
|
-
url: repoInput.url,
|
|
821
|
-
});
|
|
822
|
-
allIngestedFiles.push(...deepwikiResult.files);
|
|
823
|
-
reposSummary.push({
|
|
824
|
-
kind: 'deepwiki',
|
|
825
|
-
id: deepwikiResult.summary.repoId,
|
|
826
|
-
source: 'deepwiki',
|
|
827
|
-
notes: deepwikiResult.summary.notes,
|
|
828
|
-
});
|
|
829
|
-
}
|
|
830
966
|
}
|
|
831
967
|
// Context7 libraries (best-effort).
|
|
832
968
|
let librariesSummary;
|
|
@@ -844,6 +980,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
844
980
|
librariesSummary = libIngest.libraries;
|
|
845
981
|
}
|
|
846
982
|
// Build index.
|
|
983
|
+
reportProgress('indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
|
|
984
|
+
tracker.updateProgress(taskId, 'indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
|
|
847
985
|
await rebuildIndex(tmpPaths.searchDbPath, allIngestedFiles, {
|
|
848
986
|
includeDocs: true,
|
|
849
987
|
includeCode: true,
|
|
@@ -901,7 +1039,18 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
901
1039
|
repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
|
|
902
1040
|
libraries: librariesSummary,
|
|
903
1041
|
});
|
|
904
|
-
//
|
|
1042
|
+
// Generate static facts (FACTS.json) FIRST. This is intentionally non-LLM and safe to keep inside bundles.
|
|
1043
|
+
reportProgress('analyzing', 70, 'Analyzing code structure...');
|
|
1044
|
+
tracker.updateProgress(taskId, 'analyzing', 70, 'Analyzing code structure...');
|
|
1045
|
+
await generateFactsBestEffort({
|
|
1046
|
+
bundleId,
|
|
1047
|
+
bundleRoot: tmpPaths.rootDir,
|
|
1048
|
+
files: allIngestedFiles,
|
|
1049
|
+
mode: cfg.analysisMode,
|
|
1050
|
+
});
|
|
1051
|
+
// Overview (S2: factual-only with evidence pointers) - generated AFTER FACTS.json
|
|
1052
|
+
reportProgress('generating', 80, 'Generating overview...');
|
|
1053
|
+
tracker.updateProgress(taskId, 'generating', 80, 'Generating overview...');
|
|
905
1054
|
const perRepoOverviews = reposSummary
|
|
906
1055
|
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
907
1056
|
.map((r) => {
|
|
@@ -916,13 +1065,6 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
916
1065
|
libraries: librariesSummary,
|
|
917
1066
|
});
|
|
918
1067
|
await writeOverviewFile(tmpPaths.overviewPath, overviewMd);
|
|
919
|
-
// Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
|
|
920
|
-
await generateFactsBestEffort({
|
|
921
|
-
bundleId,
|
|
922
|
-
bundleRoot: tmpPaths.rootDir,
|
|
923
|
-
files: allIngestedFiles,
|
|
924
|
-
mode: cfg.analysisMode,
|
|
925
|
-
});
|
|
926
1068
|
// CRITICAL: Validate bundle completeness BEFORE atomic move
|
|
927
1069
|
const validation = await validateBundleCompleteness(tmpPaths.rootDir);
|
|
928
1070
|
if (!validation.isValid) {
|
|
@@ -932,6 +1074,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
932
1074
|
}
|
|
933
1075
|
// ATOMIC OPERATION: Move from temp to final location
|
|
934
1076
|
// This is atomic on most filesystems - bundle becomes visible only when complete
|
|
1077
|
+
reportProgress('finalizing', 90, 'Finalizing bundle...');
|
|
1078
|
+
tracker.updateProgress(taskId, 'finalizing', 90, 'Finalizing bundle...');
|
|
935
1079
|
logger.info(`Moving bundle ${bundleId} from temp to final location (atomic)`);
|
|
936
1080
|
await ensureDir(effectiveStorageDir);
|
|
937
1081
|
try {
|
|
@@ -958,7 +1102,10 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
958
1102
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
959
1103
|
}
|
|
960
1104
|
// Update de-duplication index (best-effort). This is intentionally after atomic move.
|
|
961
|
-
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
|
|
1105
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt, 'complete');
|
|
1106
|
+
// Mark task complete
|
|
1107
|
+
reportProgress('complete', 100, `Bundle created: ${bundleId}`);
|
|
1108
|
+
tracker.completeTask(taskId, bundleId);
|
|
962
1109
|
const summary = {
|
|
963
1110
|
bundleId,
|
|
964
1111
|
createdAt,
|
|
@@ -972,14 +1119,21 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
972
1119
|
// Clean up temp directory on failure
|
|
973
1120
|
logger.error(`Bundle creation failed, cleaning up temp: ${bundleId}`, err instanceof Error ? err : undefined);
|
|
974
1121
|
await rmIfExists(tmpPaths.rootDir);
|
|
975
|
-
//
|
|
1122
|
+
// Clear in-progress lock on failure
|
|
1123
|
+
await clearInProgressLock(cfg, fingerprint);
|
|
1124
|
+
// Mark task failed
|
|
976
1125
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
1126
|
+
tracker.failTask(taskId, errorMsg);
|
|
1127
|
+
// Re-throw with enhanced message (unless it's already our BUNDLE_IN_PROGRESS error)
|
|
1128
|
+
if (err?.code === 'BUNDLE_IN_PROGRESS') {
|
|
1129
|
+
throw err;
|
|
1130
|
+
}
|
|
977
1131
|
throw new Error(`Failed to create bundle: ${errorMsg}`);
|
|
978
1132
|
}
|
|
979
1133
|
finally {
|
|
980
1134
|
// Ensure temp directory is cleaned up (double safety)
|
|
981
|
-
await rmIfExists(tmpPaths.rootDir).catch(() => {
|
|
982
|
-
|
|
1135
|
+
await rmIfExists(tmpPaths.rootDir).catch((err) => {
|
|
1136
|
+
logger.debug('Failed to cleanup temp bundle directory in finally block (non-critical)', err instanceof Error ? err : undefined);
|
|
983
1137
|
});
|
|
984
1138
|
}
|
|
985
1139
|
}
|
|
@@ -1008,19 +1162,14 @@ export async function checkForUpdates(cfg, bundleId) {
|
|
|
1008
1162
|
hasUpdates = true;
|
|
1009
1163
|
details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
|
|
1010
1164
|
}
|
|
1011
|
-
else
|
|
1165
|
+
else {
|
|
1166
|
+
// Local: can't reliably detect whether local files changed without scanning; assume possible update.
|
|
1012
1167
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1013
1168
|
const repoId = `${owner}/${repo}`;
|
|
1014
|
-
// We can't reliably detect whether local files changed without scanning; assume possible update.
|
|
1015
1169
|
const prev = manifest.repos.find((r) => r.id === repoId);
|
|
1016
1170
|
details.push({ repoId, currentSha: prev?.headSha, changed: true });
|
|
1017
1171
|
hasUpdates = true;
|
|
1018
1172
|
}
|
|
1019
|
-
else {
|
|
1020
|
-
// DeepWiki: can't easily detect changes, assume possible update
|
|
1021
|
-
details.push({ repoId: repoInput.url, changed: true });
|
|
1022
|
-
hasUpdates = true;
|
|
1023
|
-
}
|
|
1024
1173
|
}
|
|
1025
1174
|
return { hasUpdates, details };
|
|
1026
1175
|
}
|
|
@@ -1122,41 +1271,6 @@ async function scanBundleIndexableFiles(params) {
|
|
|
1122
1271
|
});
|
|
1123
1272
|
}
|
|
1124
1273
|
}
|
|
1125
|
-
// 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
|
|
1126
|
-
const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
|
|
1127
|
-
const dwSt = await statOrNull(deepwikiDir);
|
|
1128
|
-
if (dwSt?.isDirectory()) {
|
|
1129
|
-
// Only walk the norm subtrees.
|
|
1130
|
-
const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
|
|
1131
|
-
for (const ownerEnt of owners) {
|
|
1132
|
-
if (!ownerEnt.isDirectory())
|
|
1133
|
-
continue;
|
|
1134
|
-
const owner = ownerEnt.name;
|
|
1135
|
-
const ownerDir = path.join(deepwikiDir, owner);
|
|
1136
|
-
const repos = await fs.readdir(ownerDir, { withFileTypes: true });
|
|
1137
|
-
for (const repoEnt of repos) {
|
|
1138
|
-
if (!repoEnt.isDirectory())
|
|
1139
|
-
continue;
|
|
1140
|
-
const repo = repoEnt.name;
|
|
1141
|
-
const normDir = path.join(ownerDir, repo, 'norm');
|
|
1142
|
-
const normSt = await statOrNull(normDir);
|
|
1143
|
-
if (!normSt?.isDirectory())
|
|
1144
|
-
continue;
|
|
1145
|
-
for await (const wf of walkFilesNoIgnore(normDir)) {
|
|
1146
|
-
if (!wf.relPosix.toLowerCase().endsWith('.md'))
|
|
1147
|
-
continue;
|
|
1148
|
-
const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
|
|
1149
|
-
await pushFile({
|
|
1150
|
-
repoId: `deepwiki:${owner}/${repo}`,
|
|
1151
|
-
kind: 'doc',
|
|
1152
|
-
repoRelativePath: wf.relPosix,
|
|
1153
|
-
bundleRelPosix: bundleRel,
|
|
1154
|
-
absPath: wf.absPath,
|
|
1155
|
-
});
|
|
1156
|
-
}
|
|
1157
|
-
}
|
|
1158
|
-
}
|
|
1159
|
-
}
|
|
1160
1274
|
return { files, totalBytes, skipped };
|
|
1161
1275
|
}
|
|
1162
1276
|
export async function repairBundle(cfg, bundleId, options) {
|
|
@@ -1281,15 +1395,27 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1281
1395
|
const paths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
1282
1396
|
const manifest = await readManifest(paths.manifestPath);
|
|
1283
1397
|
const updatedAt = nowIso();
|
|
1398
|
+
const onProgress = options?.onProgress;
|
|
1399
|
+
// Report progress helper
|
|
1400
|
+
const reportProgress = (phase, progress, message, total) => {
|
|
1401
|
+
if (onProgress) {
|
|
1402
|
+
onProgress(phase, progress, message, total);
|
|
1403
|
+
}
|
|
1404
|
+
};
|
|
1405
|
+
reportProgress('starting', 0, `Updating bundle ${bundleId}...`);
|
|
1284
1406
|
let changed = false;
|
|
1285
1407
|
const allIngestedFiles = [];
|
|
1286
1408
|
const reposSummary = [];
|
|
1409
|
+
const totalRepos = manifest.inputs.repos.length;
|
|
1410
|
+
let repoIndex = 0;
|
|
1287
1411
|
// Rebuild everything obvious for now (simple + deterministic).
|
|
1288
1412
|
for (const repoInput of manifest.inputs.repos) {
|
|
1413
|
+
repoIndex++;
|
|
1289
1414
|
if (repoInput.kind === 'github') {
|
|
1290
1415
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1291
1416
|
const repoId = `${owner}/${repo}`;
|
|
1292
1417
|
const cloneUrl = toCloneUrl({ owner, repo });
|
|
1418
|
+
reportProgress('cloning', calcPercent(repoIndex - 1, totalRepos), `Checking ${repoId}...`, totalRepos);
|
|
1293
1419
|
let remoteSha;
|
|
1294
1420
|
try {
|
|
1295
1421
|
remoteSha = await getRemoteHeadSha(cloneUrl);
|
|
@@ -1301,6 +1427,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1301
1427
|
if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
|
|
1302
1428
|
changed = true;
|
|
1303
1429
|
}
|
|
1430
|
+
reportProgress('downloading', calcPercent(repoIndex - 1, totalRepos), `Fetching ${repoId}...`, totalRepos);
|
|
1304
1431
|
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
1305
1432
|
cfg,
|
|
1306
1433
|
bundleId,
|
|
@@ -1308,6 +1435,9 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1308
1435
|
owner,
|
|
1309
1436
|
repo,
|
|
1310
1437
|
ref: repoInput.ref,
|
|
1438
|
+
onProgress: (phase, progress, message) => {
|
|
1439
|
+
reportProgress(phase, progress, message);
|
|
1440
|
+
},
|
|
1311
1441
|
});
|
|
1312
1442
|
if (prev?.headSha && headSha && headSha !== prev.headSha) {
|
|
1313
1443
|
changed = true;
|
|
@@ -1319,7 +1449,8 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1319
1449
|
allIngestedFiles.push(...files);
|
|
1320
1450
|
reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
|
|
1321
1451
|
}
|
|
1322
|
-
else
|
|
1452
|
+
else {
|
|
1453
|
+
// Local repository
|
|
1323
1454
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1324
1455
|
const repoId = `${owner}/${repo}`;
|
|
1325
1456
|
const { files, skipped } = await ingestLocalRepo({
|
|
@@ -1335,27 +1466,11 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1335
1466
|
reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
|
|
1336
1467
|
changed = true;
|
|
1337
1468
|
}
|
|
1338
|
-
else {
|
|
1339
|
-
// DeepWiki integration: fetch and convert to Markdown.
|
|
1340
|
-
const deepwikiResult = await ingestDeepWikiRepo({
|
|
1341
|
-
cfg,
|
|
1342
|
-
bundlePaths: paths,
|
|
1343
|
-
url: repoInput.url,
|
|
1344
|
-
});
|
|
1345
|
-
allIngestedFiles.push(...deepwikiResult.files);
|
|
1346
|
-
reposSummary.push({
|
|
1347
|
-
kind: 'deepwiki',
|
|
1348
|
-
id: deepwikiResult.summary.repoId,
|
|
1349
|
-
source: 'deepwiki',
|
|
1350
|
-
notes: deepwikiResult.summary.notes,
|
|
1351
|
-
});
|
|
1352
|
-
// Always mark as changed for DeepWiki since we can't easily detect content changes.
|
|
1353
|
-
changed = true;
|
|
1354
|
-
}
|
|
1355
1469
|
}
|
|
1356
1470
|
// Context7 libraries (best-effort).
|
|
1357
1471
|
let librariesSummary;
|
|
1358
1472
|
if (manifest.inputs.libraries?.length) {
|
|
1473
|
+
reportProgress('downloading', 80, 'Fetching Context7 libraries...');
|
|
1359
1474
|
await rmIfExists(paths.librariesDir);
|
|
1360
1475
|
await ensureDir(paths.librariesDir);
|
|
1361
1476
|
const libIngest = await ingestContext7Libraries({
|
|
@@ -1368,6 +1483,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1368
1483
|
librariesSummary = libIngest.libraries;
|
|
1369
1484
|
}
|
|
1370
1485
|
// Rebuild index.
|
|
1486
|
+
reportProgress('indexing', 85, `Rebuilding search index (${allIngestedFiles.length} files)...`);
|
|
1371
1487
|
await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
|
|
1372
1488
|
includeDocs: manifest.index.includeDocs,
|
|
1373
1489
|
includeCode: manifest.index.includeCode,
|
|
@@ -1393,6 +1509,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1393
1509
|
};
|
|
1394
1510
|
await writeManifest(paths.manifestPath, newManifest);
|
|
1395
1511
|
// Regenerate guides + overview.
|
|
1512
|
+
reportProgress('generating', 90, 'Regenerating guides and overview...');
|
|
1396
1513
|
await writeAgentsMd(paths.agentsPath);
|
|
1397
1514
|
await writeStartHereMd({
|
|
1398
1515
|
targetPath: paths.startHerePath,
|
|
@@ -1415,6 +1532,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1415
1532
|
});
|
|
1416
1533
|
await writeOverviewFile(paths.overviewPath, overviewMd);
|
|
1417
1534
|
// Refresh static facts (FACTS.json) after update.
|
|
1535
|
+
reportProgress('analyzing', 95, 'Analyzing bundle...');
|
|
1418
1536
|
await generateFactsBestEffort({
|
|
1419
1537
|
bundleId,
|
|
1420
1538
|
bundleRoot: paths.rootDir,
|
|
@@ -1422,11 +1540,13 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1422
1540
|
mode: cfg.analysisMode,
|
|
1423
1541
|
});
|
|
1424
1542
|
// Mirror to backup storage directories (non-blocking on failures)
|
|
1543
|
+
reportProgress('finalizing', 98, 'Finalizing update...');
|
|
1425
1544
|
if (cfg.storageDirs.length > 1) {
|
|
1426
1545
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
1427
1546
|
}
|
|
1428
1547
|
// Keep the de-duplication index fresh (best-effort).
|
|
1429
1548
|
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
1549
|
+
reportProgress('complete', 100, `Bundle updated: ${bundleId}`);
|
|
1430
1550
|
const summary = {
|
|
1431
1551
|
bundleId,
|
|
1432
1552
|
createdAt: manifest.createdAt,
|