preflight-mcp 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/bundle/github.js +34 -3
- package/dist/bundle/githubArchive.js +58 -6
- package/dist/bundle/service.js +224 -5
- package/dist/config.js +1 -0
- package/dist/evidence/dependencyGraph.js +312 -2
- package/dist/jobs/progressTracker.js +191 -0
- package/dist/server.js +261 -36
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,8 +15,12 @@ Each bundle contains:
|
|
|
15
15
|
|
|
16
16
|
## Features
|
|
17
17
|
|
|
18
|
-
- **
|
|
19
|
-
- **
|
|
18
|
+
- **13 MCP tools** to create/update/repair/search/read bundles, generate evidence graphs, and manage trace links
|
|
19
|
+
- **Progress tracking**: Real-time progress reporting for long-running operations (create/update bundles)
|
|
20
|
+
- **Bundle integrity check**: Prevents operations on incomplete bundles with helpful error messages
|
|
21
|
+
- **De-duplication with in-progress lock**: Prevent duplicate bundle creation even during MCP timeouts
|
|
22
|
+
- **Global dependency graph**: Generate project-wide import relationship graphs
|
|
23
|
+
- **Batch file reading**: Read all key bundle files in a single call
|
|
20
24
|
- **Resilient GitHub fetching**: configurable git clone timeout + GitHub archive (zipball) fallback
|
|
21
25
|
- **Offline repair**: rebuild missing/empty derived artifacts (index/guides/overview) without re-fetching
|
|
22
26
|
- **Static facts extraction** via `analysis/FACTS.json` (non-LLM)
|
|
@@ -117,7 +121,7 @@ Run end-to-end smoke test:
|
|
|
117
121
|
npm run smoke
|
|
118
122
|
```
|
|
119
123
|
|
|
120
|
-
## Tools (
|
|
124
|
+
## Tools (13 total)
|
|
121
125
|
|
|
122
126
|
### `preflight_list_bundles`
|
|
123
127
|
List bundle IDs in storage.
|
|
@@ -146,8 +150,10 @@ Input (example):
|
|
|
146
150
|
**Note**: If the bundle contains code files, consider using `preflight_evidence_dependency_graph` for dependency analysis or `preflight_trace_upsert` for trace links.
|
|
147
151
|
|
|
148
152
|
### `preflight_read_file`
|
|
149
|
-
Read
|
|
150
|
-
-
|
|
153
|
+
Read file(s) from bundle. Two modes:
|
|
154
|
+
- **Batch mode** (omit `file`): Returns ALL key files (OVERVIEW.md, START_HERE.md, AGENTS.md, manifest.json, repo READMEs) in one call
|
|
155
|
+
- **Single file mode** (provide `file`): Returns that specific file
|
|
156
|
+
- Triggers: "查看bundle", "bundle概览", "项目信息", "show bundle"
|
|
151
157
|
- Use `file: "manifest.json"` to get bundle metadata (repos, timestamps, tags, etc.)
|
|
152
158
|
|
|
153
159
|
### `preflight_delete_bundle`
|
|
@@ -189,10 +195,12 @@ Optional parameters:
|
|
|
189
195
|
- `limit`: Max total hits across all bundles
|
|
190
196
|
|
|
191
197
|
### `preflight_evidence_dependency_graph`
|
|
192
|
-
Generate an evidence-based dependency graph
|
|
198
|
+
Generate an evidence-based dependency graph. Two modes:
|
|
199
|
+
- **Target mode** (provide `target.file`): Analyze a specific file's imports and callers
|
|
200
|
+
- **Global mode** (omit `target`): Generate project-wide import graph of all code files
|
|
193
201
|
- Deterministic output with source ranges for edges.
|
|
194
202
|
- Uses Tree-sitter parsing when `PREFLIGHT_AST_ENGINE=wasm`; falls back to regex extraction otherwise.
|
|
195
|
-
- Emits `imports` edges (file → module) and
|
|
203
|
+
- Emits `imports` edges (file → module) and `imports_resolved` edges (file → internal file).
|
|
196
204
|
|
|
197
205
|
### `preflight_trace_upsert`
|
|
198
206
|
Upsert traceability links (commit↔ticket, symbol↔test, code↔doc, etc.) for a bundle.
|
|
@@ -210,6 +218,12 @@ Parameters:
|
|
|
210
218
|
|
|
211
219
|
Note: This is also automatically executed on server startup (background, non-blocking).
|
|
212
220
|
|
|
221
|
+
### `preflight_get_task_status`
|
|
222
|
+
Check status of bundle creation/update tasks (progress tracking).
|
|
223
|
+
- Triggers: "check progress", "what is the status", "查看任务状态", "下载进度"
|
|
224
|
+
- Query by `taskId` (from error), `fingerprint`, or `repos`
|
|
225
|
+
- Shows: phase, progress percentage, message, elapsed time
|
|
226
|
+
|
|
213
227
|
## Resources
|
|
214
228
|
|
|
215
229
|
### `preflight://bundles`
|
package/dist/bundle/github.js
CHANGED
|
@@ -13,8 +13,26 @@ export function parseOwnerRepo(input) {
|
|
|
13
13
|
export function toCloneUrl(ref) {
|
|
14
14
|
return `https://github.com/${ref.owner}/${ref.repo}.git`;
|
|
15
15
|
}
|
|
16
|
+
/**
|
|
17
|
+
* Parse git clone progress from stderr.
|
|
18
|
+
* Git outputs progress like:
|
|
19
|
+
* - "Receiving objects: 45% (1234/2741)"
|
|
20
|
+
* - "Resolving deltas: 60% (100/167)"
|
|
21
|
+
*/
|
|
22
|
+
function parseGitProgress(line) {
|
|
23
|
+
// Match patterns like "Receiving objects: 45% (1234/2741)"
|
|
24
|
+
const match = line.match(/(Receiving objects|Resolving deltas|Counting objects|Compressing objects):\s+(\d+)%/);
|
|
25
|
+
if (match) {
|
|
26
|
+
return {
|
|
27
|
+
phase: match[1],
|
|
28
|
+
percent: parseInt(match[2], 10),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
16
33
|
async function runGit(args, opts) {
|
|
17
34
|
const timeoutMs = opts?.timeoutMs ?? 5 * 60_000;
|
|
35
|
+
const onProgress = opts?.onProgress;
|
|
18
36
|
return new Promise((resolve, reject) => {
|
|
19
37
|
const child = spawn('git', args, {
|
|
20
38
|
cwd: opts?.cwd,
|
|
@@ -65,7 +83,19 @@ async function runGit(args, opts) {
|
|
|
65
83
|
stdout += data.toString('utf8');
|
|
66
84
|
});
|
|
67
85
|
child.stderr?.on('data', (data) => {
|
|
68
|
-
|
|
86
|
+
const chunk = data.toString('utf8');
|
|
87
|
+
stderr += chunk;
|
|
88
|
+
// Parse and report progress
|
|
89
|
+
if (onProgress) {
|
|
90
|
+
// Git progress can come in chunks, split by lines
|
|
91
|
+
const lines = chunk.split(/[\r\n]+/);
|
|
92
|
+
for (const line of lines) {
|
|
93
|
+
const progress = parseGitProgress(line);
|
|
94
|
+
if (progress) {
|
|
95
|
+
onProgress(progress.phase, progress.percent, `${progress.phase}: ${progress.percent}%`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
69
99
|
});
|
|
70
100
|
child.on('error', (err) => {
|
|
71
101
|
cleanup();
|
|
@@ -125,14 +155,15 @@ export async function shallowClone(cloneUrl, destDir, opts) {
|
|
|
125
155
|
await fs.mkdir(path.dirname(destDir), { recursive: true });
|
|
126
156
|
// Clean dest if exists.
|
|
127
157
|
await fs.rm(destDir, { recursive: true, force: true });
|
|
128
|
-
|
|
158
|
+
// Use --progress to force progress output even when not attached to a terminal
|
|
159
|
+
const args = ['-c', 'core.autocrlf=false', 'clone', '--depth', '1', '--no-tags', '--single-branch', '--progress'];
|
|
129
160
|
if (opts?.ref) {
|
|
130
161
|
// Validate ref before using it in git command
|
|
131
162
|
validateGitRef(opts.ref);
|
|
132
163
|
args.push('--branch', opts.ref);
|
|
133
164
|
}
|
|
134
165
|
args.push(cloneUrl, destDir);
|
|
135
|
-
await runGit(args, { timeoutMs: opts?.timeoutMs ?? 15 * 60_000 });
|
|
166
|
+
await runGit(args, { timeoutMs: opts?.timeoutMs ?? 15 * 60_000, onProgress: opts?.onProgress });
|
|
136
167
|
}
|
|
137
168
|
export async function getLocalHeadSha(repoDir) {
|
|
138
169
|
const { stdout } = await runGit(['-C', repoDir, 'rev-parse', 'HEAD']);
|
|
@@ -36,7 +36,7 @@ async function fetchJson(url, headers, timeoutMs = DEFAULT_API_TIMEOUT_MS) {
|
|
|
36
36
|
clearTimeout(timeoutId);
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
|
-
async function downloadToFile(url, headers, destPath, timeoutMs = DEFAULT_DOWNLOAD_TIMEOUT_MS) {
|
|
39
|
+
async function downloadToFile(url, headers, destPath, timeoutMs = DEFAULT_DOWNLOAD_TIMEOUT_MS, onProgress) {
|
|
40
40
|
const controller = new AbortController();
|
|
41
41
|
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
42
42
|
try {
|
|
@@ -44,12 +44,51 @@ async function downloadToFile(url, headers, destPath, timeoutMs = DEFAULT_DOWNLO
|
|
|
44
44
|
if (!res.ok) {
|
|
45
45
|
throw new Error(`Download error ${res.status}: ${res.statusText}`);
|
|
46
46
|
}
|
|
47
|
-
//
|
|
47
|
+
// Get content length for progress reporting
|
|
48
|
+
const contentLengthHeader = res.headers.get('content-length');
|
|
49
|
+
const totalBytes = contentLengthHeader ? parseInt(contentLengthHeader, 10) : undefined;
|
|
50
|
+
await ensureDir(path.dirname(destPath));
|
|
51
|
+
// Use streaming to report progress
|
|
48
52
|
const anyRes = res;
|
|
49
53
|
const body = anyRes.body;
|
|
50
|
-
|
|
54
|
+
if (body && typeof body[Symbol.asyncIterator] === 'function') {
|
|
55
|
+
// Async iterator for progress tracking
|
|
56
|
+
const fsModule = await import('node:fs');
|
|
57
|
+
const ws = fsModule.createWriteStream(destPath);
|
|
58
|
+
let downloadedBytes = 0;
|
|
59
|
+
let lastReportTime = Date.now();
|
|
60
|
+
const reportIntervalMs = 500; // Report at most every 500ms
|
|
61
|
+
try {
|
|
62
|
+
for await (const chunk of body) {
|
|
63
|
+
ws.write(chunk);
|
|
64
|
+
downloadedBytes += chunk.length;
|
|
65
|
+
// Throttle progress reports
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
if (onProgress && (now - lastReportTime > reportIntervalMs)) {
|
|
68
|
+
lastReportTime = now;
|
|
69
|
+
const percent = totalBytes ? Math.round((downloadedBytes / totalBytes) * 100) : 0;
|
|
70
|
+
const msg = totalBytes
|
|
71
|
+
? `Downloaded ${formatBytes(downloadedBytes)} / ${formatBytes(totalBytes)} (${percent}%)`
|
|
72
|
+
: `Downloaded ${formatBytes(downloadedBytes)}`;
|
|
73
|
+
onProgress(downloadedBytes, totalBytes, msg);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
ws.end();
|
|
79
|
+
await new Promise((resolve) => ws.on('finish', () => resolve()));
|
|
80
|
+
}
|
|
81
|
+
// Final progress report
|
|
82
|
+
if (onProgress) {
|
|
83
|
+
const msg = totalBytes
|
|
84
|
+
? `Downloaded ${formatBytes(downloadedBytes)} / ${formatBytes(totalBytes)} (100%)`
|
|
85
|
+
: `Downloaded ${formatBytes(downloadedBytes)}`;
|
|
86
|
+
onProgress(downloadedBytes, totalBytes, msg);
|
|
87
|
+
}
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
51
90
|
if (body && typeof body.pipe === 'function') {
|
|
52
|
-
// Node.js stream
|
|
91
|
+
// Node.js stream (fallback without progress)
|
|
53
92
|
const ws = (await import('node:fs')).createWriteStream(destPath);
|
|
54
93
|
await new Promise((resolve, reject) => {
|
|
55
94
|
body.pipe(ws);
|
|
@@ -59,14 +98,27 @@ async function downloadToFile(url, headers, destPath, timeoutMs = DEFAULT_DOWNLO
|
|
|
59
98
|
});
|
|
60
99
|
return;
|
|
61
100
|
}
|
|
62
|
-
// Web stream or no stream support
|
|
101
|
+
// Web stream or no stream support (fallback without progress)
|
|
63
102
|
const buf = Buffer.from(await res.arrayBuffer());
|
|
64
103
|
await fs.writeFile(destPath, buf);
|
|
104
|
+
if (onProgress) {
|
|
105
|
+
onProgress(buf.length, buf.length, `Downloaded ${formatBytes(buf.length)}`);
|
|
106
|
+
}
|
|
65
107
|
}
|
|
66
108
|
finally {
|
|
67
109
|
clearTimeout(timeoutId);
|
|
68
110
|
}
|
|
69
111
|
}
|
|
112
|
+
/** Format bytes for display */
|
|
113
|
+
function formatBytes(bytes) {
|
|
114
|
+
if (bytes < 1024)
|
|
115
|
+
return `${bytes}B`;
|
|
116
|
+
if (bytes < 1024 * 1024)
|
|
117
|
+
return `${(bytes / 1024).toFixed(1)}KB`;
|
|
118
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
119
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
|
|
120
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)}GB`;
|
|
121
|
+
}
|
|
70
122
|
async function extractZip(zipPath, destDir) {
|
|
71
123
|
await ensureDir(destDir);
|
|
72
124
|
const zip = new AdmZip(zipPath);
|
|
@@ -91,7 +143,7 @@ export async function downloadAndExtractGitHubArchive(params) {
|
|
|
91
143
|
// Use the API zipball endpoint so ref can be branch/tag/SHA (including slashes via URL-encoding).
|
|
92
144
|
const zipballUrl = `https://api.github.com/repos/${params.owner}/${params.repo}/zipball/${encodeURIComponent(refUsed)}`;
|
|
93
145
|
await ensureDir(params.destDir);
|
|
94
|
-
await downloadToFile(zipballUrl, headers, zipPath);
|
|
146
|
+
await downloadToFile(zipballUrl, headers, zipPath, DEFAULT_DOWNLOAD_TIMEOUT_MS, params.onProgress);
|
|
95
147
|
const extractDir = path.join(params.destDir, `extracted-${Date.now()}`);
|
|
96
148
|
await extractZip(zipPath, extractDir);
|
|
97
149
|
const repoRoot = await findSingleTopLevelDir(extractDir);
|
package/dist/bundle/service.js
CHANGED
|
@@ -14,6 +14,7 @@ import { ingestContext7Libraries } from './context7.js';
|
|
|
14
14
|
import { analyzeBundleStatic } from './analysis.js';
|
|
15
15
|
import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
|
|
16
16
|
import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
|
|
17
|
+
import { getProgressTracker, calcPercent } from '../jobs/progressTracker.js';
|
|
17
18
|
const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
|
|
18
19
|
function sha256Hex(text) {
|
|
19
20
|
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
|
|
@@ -90,7 +91,7 @@ async function writeDedupIndex(storageDir, idx) {
|
|
|
90
91
|
throw err;
|
|
91
92
|
}
|
|
92
93
|
}
|
|
93
|
-
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
|
|
94
|
+
async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt, status = 'complete') {
|
|
94
95
|
for (const storageDir of cfg.storageDirs) {
|
|
95
96
|
try {
|
|
96
97
|
const parentAvailable = await isParentAvailable(storageDir);
|
|
@@ -98,7 +99,7 @@ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpda
|
|
|
98
99
|
continue;
|
|
99
100
|
await ensureDir(storageDir);
|
|
100
101
|
const idx = await readDedupIndex(storageDir);
|
|
101
|
-
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
|
|
102
|
+
idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt, status };
|
|
102
103
|
idx.updatedAt = nowIso();
|
|
103
104
|
await writeDedupIndex(storageDir, idx);
|
|
104
105
|
}
|
|
@@ -107,6 +108,97 @@ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpda
|
|
|
107
108
|
}
|
|
108
109
|
}
|
|
109
110
|
}
|
|
111
|
+
/**
|
|
112
|
+
* Set in-progress lock for a fingerprint. Returns false if already locked (not timed out).
|
|
113
|
+
*/
|
|
114
|
+
async function setInProgressLock(cfg, fingerprint, taskId, repos) {
|
|
115
|
+
const now = nowIso();
|
|
116
|
+
const nowMs = Date.now();
|
|
117
|
+
for (const storageDir of cfg.storageDirs) {
|
|
118
|
+
try {
|
|
119
|
+
if (!(await isPathAvailable(storageDir)))
|
|
120
|
+
continue;
|
|
121
|
+
await ensureDir(storageDir);
|
|
122
|
+
const idx = await readDedupIndex(storageDir);
|
|
123
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
124
|
+
// Check if there's an existing in-progress lock
|
|
125
|
+
if (existing?.status === 'in-progress' && existing.startedAt) {
|
|
126
|
+
const startedMs = new Date(existing.startedAt).getTime();
|
|
127
|
+
const elapsed = nowMs - startedMs;
|
|
128
|
+
// If lock hasn't timed out, return the existing entry
|
|
129
|
+
if (elapsed < cfg.inProgressLockTimeoutMs) {
|
|
130
|
+
return { locked: false, existingEntry: existing };
|
|
131
|
+
}
|
|
132
|
+
// Lock timed out - will be overwritten
|
|
133
|
+
logger.warn(`In-progress lock timed out for fingerprint ${fingerprint.slice(0, 8)}...`);
|
|
134
|
+
}
|
|
135
|
+
// Set new in-progress lock
|
|
136
|
+
idx.byFingerprint[fingerprint] = {
|
|
137
|
+
bundleId: '', // Will be set on completion
|
|
138
|
+
bundleUpdatedAt: now,
|
|
139
|
+
status: 'in-progress',
|
|
140
|
+
startedAt: now,
|
|
141
|
+
taskId,
|
|
142
|
+
repos,
|
|
143
|
+
};
|
|
144
|
+
idx.updatedAt = now;
|
|
145
|
+
await writeDedupIndex(storageDir, idx);
|
|
146
|
+
return { locked: true };
|
|
147
|
+
}
|
|
148
|
+
catch (err) {
|
|
149
|
+
logger.debug(`Failed to set in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// If we couldn't write to any storage, assume we can proceed (best-effort)
|
|
153
|
+
return { locked: true };
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Clear in-progress lock (on failure or completion with status='complete').
|
|
157
|
+
*/
|
|
158
|
+
async function clearInProgressLock(cfg, fingerprint) {
|
|
159
|
+
for (const storageDir of cfg.storageDirs) {
|
|
160
|
+
try {
|
|
161
|
+
if (!(await isPathAvailable(storageDir)))
|
|
162
|
+
continue;
|
|
163
|
+
const idx = await readDedupIndex(storageDir);
|
|
164
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
165
|
+
// Only clear if it's in-progress
|
|
166
|
+
if (existing?.status === 'in-progress') {
|
|
167
|
+
delete idx.byFingerprint[fingerprint];
|
|
168
|
+
idx.updatedAt = nowIso();
|
|
169
|
+
await writeDedupIndex(storageDir, idx);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
logger.debug(`Failed to clear in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Check if a fingerprint has an in-progress lock (not timed out).
|
|
179
|
+
*/
|
|
180
|
+
export async function checkInProgressLock(cfg, fingerprint) {
|
|
181
|
+
const nowMs = Date.now();
|
|
182
|
+
for (const storageDir of cfg.storageDirs) {
|
|
183
|
+
try {
|
|
184
|
+
if (!(await isPathAvailable(storageDir)))
|
|
185
|
+
continue;
|
|
186
|
+
const idx = await readDedupIndex(storageDir);
|
|
187
|
+
const existing = idx.byFingerprint[fingerprint];
|
|
188
|
+
if (existing?.status === 'in-progress' && existing.startedAt) {
|
|
189
|
+
const startedMs = new Date(existing.startedAt).getTime();
|
|
190
|
+
const elapsed = nowMs - startedMs;
|
|
191
|
+
if (elapsed < cfg.inProgressLockTimeoutMs) {
|
|
192
|
+
return existing;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
catch {
|
|
197
|
+
// ignore
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
110
202
|
async function readBundleSummary(cfg, bundleId) {
|
|
111
203
|
const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
|
|
112
204
|
const paths = getBundlePaths(storageDir, bundleId);
|
|
@@ -137,6 +229,9 @@ async function findExistingBundleByFingerprint(cfg, fingerprint) {
|
|
|
137
229
|
continue;
|
|
138
230
|
const idx = await readDedupIndex(storageDir);
|
|
139
231
|
const hit = idx.byFingerprint[fingerprint];
|
|
232
|
+
// Skip in-progress entries - they don't have a completed bundle yet
|
|
233
|
+
if (hit?.status === 'in-progress')
|
|
234
|
+
continue;
|
|
140
235
|
if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
|
|
141
236
|
return hit.bundleId;
|
|
142
237
|
}
|
|
@@ -321,6 +416,33 @@ async function validateBundleCompleteness(bundleRoot) {
|
|
|
321
416
|
missingComponents,
|
|
322
417
|
};
|
|
323
418
|
}
|
|
419
|
+
/**
|
|
420
|
+
* Assert that a bundle is complete and ready for operations.
|
|
421
|
+
* Throws an error with helpful guidance if the bundle is incomplete.
|
|
422
|
+
* Should be called at the entry point of tools that require a complete bundle
|
|
423
|
+
* (e.g., dependency graph, trace links, search).
|
|
424
|
+
*/
|
|
425
|
+
export async function assertBundleComplete(cfg, bundleId) {
|
|
426
|
+
const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
|
|
427
|
+
if (!storageDir) {
|
|
428
|
+
throw new Error(`Bundle not found: ${bundleId}`);
|
|
429
|
+
}
|
|
430
|
+
const bundleRoot = getBundlePaths(storageDir, bundleId).rootDir;
|
|
431
|
+
const { isValid, missingComponents } = await validateBundleCompleteness(bundleRoot);
|
|
432
|
+
if (!isValid) {
|
|
433
|
+
const issues = missingComponents.join('\n - ');
|
|
434
|
+
throw new Error(`Bundle is incomplete and cannot be used for this operation.\n\n` +
|
|
435
|
+
`Bundle ID: ${bundleId}\n` +
|
|
436
|
+
`Missing components:\n - ${issues}\n\n` +
|
|
437
|
+
`This usually happens when:\n` +
|
|
438
|
+
`1. Bundle creation was interrupted (timeout, network error, etc.)\n` +
|
|
439
|
+
`2. Bundle download is still in progress\n\n` +
|
|
440
|
+
`Suggested actions:\n` +
|
|
441
|
+
`- Use preflight_update_bundle with force:true to re-download the repository\n` +
|
|
442
|
+
`- Or use preflight_delete_bundle and preflight_create_bundle to start fresh\n` +
|
|
443
|
+
`- Check preflight_get_task_status if creation might still be in progress`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
324
446
|
/**
|
|
325
447
|
* Detect primary language from ingested files
|
|
326
448
|
*/
|
|
@@ -628,7 +750,14 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
628
750
|
let fetchedAt = nowIso();
|
|
629
751
|
let refUsed = params.ref;
|
|
630
752
|
try {
|
|
631
|
-
|
|
753
|
+
params.onProgress?.('cloning', 0, `Cloning ${repoId}...`);
|
|
754
|
+
await shallowClone(cloneUrl, tmpCheckoutGit, {
|
|
755
|
+
ref: params.ref,
|
|
756
|
+
timeoutMs: params.cfg.gitCloneTimeoutMs,
|
|
757
|
+
onProgress: (phase, percent, msg) => {
|
|
758
|
+
params.onProgress?.('cloning', percent, `${repoId}: ${msg}`);
|
|
759
|
+
},
|
|
760
|
+
});
|
|
632
761
|
headSha = await getLocalHeadSha(tmpCheckoutGit);
|
|
633
762
|
}
|
|
634
763
|
catch (err) {
|
|
@@ -636,12 +765,17 @@ async function cloneAndIngestGitHubRepo(params) {
|
|
|
636
765
|
source = 'archive';
|
|
637
766
|
const msg = err instanceof Error ? err.message : String(err);
|
|
638
767
|
notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
|
|
768
|
+
params.onProgress?.('downloading', 0, `Downloading ${repoId} archive...`);
|
|
639
769
|
const archive = await downloadAndExtractGitHubArchive({
|
|
640
770
|
cfg: params.cfg,
|
|
641
771
|
owner: params.owner,
|
|
642
772
|
repo: params.repo,
|
|
643
773
|
ref: params.ref,
|
|
644
774
|
destDir: tmpArchiveDir,
|
|
775
|
+
onProgress: (downloaded, total, msg) => {
|
|
776
|
+
const percent = total ? Math.round((downloaded / total) * 100) : 0;
|
|
777
|
+
params.onProgress?.('downloading', percent, `${repoId}: ${msg}`);
|
|
778
|
+
},
|
|
645
779
|
});
|
|
646
780
|
repoRootForIngest = archive.repoRoot;
|
|
647
781
|
fetchedAt = archive.fetchedAt;
|
|
@@ -721,6 +855,15 @@ export async function createBundle(cfg, input, options) {
|
|
|
721
855
|
}
|
|
722
856
|
async function createBundleInternal(cfg, input, options) {
|
|
723
857
|
const fingerprint = computeCreateInputFingerprint(input);
|
|
858
|
+
const repoIds = input.repos.map((r) => r.repo);
|
|
859
|
+
const onProgress = options?.onProgress;
|
|
860
|
+
const tracker = getProgressTracker();
|
|
861
|
+
// Helper to report progress
|
|
862
|
+
const reportProgress = (phase, progress, message, total) => {
|
|
863
|
+
if (onProgress) {
|
|
864
|
+
onProgress(phase, progress, message, total);
|
|
865
|
+
}
|
|
866
|
+
};
|
|
724
867
|
const ifExists = options?.ifExists ?? 'error';
|
|
725
868
|
if (ifExists !== 'createNew') {
|
|
726
869
|
const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
|
|
@@ -735,6 +878,28 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
735
878
|
throw new Error(`Bundle already exists for these inputs: ${existing}`);
|
|
736
879
|
}
|
|
737
880
|
}
|
|
881
|
+
// Start tracking this task
|
|
882
|
+
const taskId = tracker.startTask(fingerprint, repoIds);
|
|
883
|
+
reportProgress('starting', 0, `Starting bundle creation for ${repoIds.join(', ')}`);
|
|
884
|
+
// Try to acquire in-progress lock
|
|
885
|
+
const lockResult = await setInProgressLock(cfg, fingerprint, taskId, repoIds);
|
|
886
|
+
if (!lockResult.locked) {
|
|
887
|
+
// Another task is already creating this bundle
|
|
888
|
+
const entry = lockResult.existingEntry;
|
|
889
|
+
const elapsedSec = entry.startedAt
|
|
890
|
+
? Math.round((Date.now() - new Date(entry.startedAt).getTime()) / 1000)
|
|
891
|
+
: 0;
|
|
892
|
+
const msg = `Bundle creation already in progress (taskId: ${entry.taskId}, started ${elapsedSec}s ago). ` +
|
|
893
|
+
`Use preflight_get_task_status to check progress.`;
|
|
894
|
+
// Throw a special error that can be caught and handled
|
|
895
|
+
const err = new Error(msg);
|
|
896
|
+
err.code = 'BUNDLE_IN_PROGRESS';
|
|
897
|
+
err.taskId = entry.taskId;
|
|
898
|
+
err.fingerprint = fingerprint;
|
|
899
|
+
err.repos = entry.repos;
|
|
900
|
+
err.startedAt = entry.startedAt;
|
|
901
|
+
throw err;
|
|
902
|
+
}
|
|
738
903
|
const bundleId = crypto.randomUUID();
|
|
739
904
|
const createdAt = nowIso();
|
|
740
905
|
// Use effective storage dir (falls back if primary unavailable)
|
|
@@ -749,9 +914,15 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
749
914
|
const reposSummary = [];
|
|
750
915
|
try {
|
|
751
916
|
// All operations happen in tmpPaths (temporary directory)
|
|
917
|
+
const totalRepos = input.repos.length;
|
|
918
|
+
let repoIndex = 0;
|
|
752
919
|
for (const repoInput of input.repos) {
|
|
920
|
+
repoIndex++;
|
|
921
|
+
const repoProgress = Math.round((repoIndex - 1) / totalRepos * 40); // 0-40% for repo fetching
|
|
753
922
|
if (repoInput.kind === 'github') {
|
|
754
923
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
924
|
+
reportProgress('cloning', repoProgress, `[${repoIndex}/${totalRepos}] Fetching ${owner}/${repo}...`);
|
|
925
|
+
tracker.updateProgress(taskId, 'cloning', repoProgress, `Fetching ${owner}/${repo}...`);
|
|
755
926
|
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
756
927
|
cfg,
|
|
757
928
|
bundleId,
|
|
@@ -759,6 +930,12 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
759
930
|
owner,
|
|
760
931
|
repo,
|
|
761
932
|
ref: repoInput.ref,
|
|
933
|
+
onProgress: (phase, percent, msg) => {
|
|
934
|
+
// Map clone/download progress to overall progress (0-40% range per repo)
|
|
935
|
+
const overallProgress = repoProgress + Math.round(percent * 0.4 / totalRepos);
|
|
936
|
+
reportProgress(phase, overallProgress, `[${repoIndex}/${totalRepos}] ${msg}`);
|
|
937
|
+
tracker.updateProgress(taskId, phase, overallProgress, msg);
|
|
938
|
+
},
|
|
762
939
|
});
|
|
763
940
|
allIngestedFiles.push(...files);
|
|
764
941
|
reposSummary.push({
|
|
@@ -772,6 +949,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
772
949
|
else {
|
|
773
950
|
// Local repository
|
|
774
951
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
952
|
+
reportProgress('ingesting', repoProgress, `[${repoIndex}/${totalRepos}] Ingesting local ${owner}/${repo}...`);
|
|
953
|
+
tracker.updateProgress(taskId, 'ingesting', repoProgress, `Ingesting local ${owner}/${repo}...`);
|
|
775
954
|
const { files, skipped } = await ingestLocalRepo({
|
|
776
955
|
cfg,
|
|
777
956
|
bundleId,
|
|
@@ -801,6 +980,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
801
980
|
librariesSummary = libIngest.libraries;
|
|
802
981
|
}
|
|
803
982
|
// Build index.
|
|
983
|
+
reportProgress('indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
|
|
984
|
+
tracker.updateProgress(taskId, 'indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
|
|
804
985
|
await rebuildIndex(tmpPaths.searchDbPath, allIngestedFiles, {
|
|
805
986
|
includeDocs: true,
|
|
806
987
|
includeCode: true,
|
|
@@ -859,6 +1040,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
859
1040
|
libraries: librariesSummary,
|
|
860
1041
|
});
|
|
861
1042
|
// Generate static facts (FACTS.json) FIRST. This is intentionally non-LLM and safe to keep inside bundles.
|
|
1043
|
+
reportProgress('analyzing', 70, 'Analyzing code structure...');
|
|
1044
|
+
tracker.updateProgress(taskId, 'analyzing', 70, 'Analyzing code structure...');
|
|
862
1045
|
await generateFactsBestEffort({
|
|
863
1046
|
bundleId,
|
|
864
1047
|
bundleRoot: tmpPaths.rootDir,
|
|
@@ -866,6 +1049,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
866
1049
|
mode: cfg.analysisMode,
|
|
867
1050
|
});
|
|
868
1051
|
// Overview (S2: factual-only with evidence pointers) - generated AFTER FACTS.json
|
|
1052
|
+
reportProgress('generating', 80, 'Generating overview...');
|
|
1053
|
+
tracker.updateProgress(taskId, 'generating', 80, 'Generating overview...');
|
|
869
1054
|
const perRepoOverviews = reposSummary
|
|
870
1055
|
.filter((r) => r.kind === 'github' || r.kind === 'local')
|
|
871
1056
|
.map((r) => {
|
|
@@ -889,6 +1074,8 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
889
1074
|
}
|
|
890
1075
|
// ATOMIC OPERATION: Move from temp to final location
|
|
891
1076
|
// This is atomic on most filesystems - bundle becomes visible only when complete
|
|
1077
|
+
reportProgress('finalizing', 90, 'Finalizing bundle...');
|
|
1078
|
+
tracker.updateProgress(taskId, 'finalizing', 90, 'Finalizing bundle...');
|
|
892
1079
|
logger.info(`Moving bundle ${bundleId} from temp to final location (atomic)`);
|
|
893
1080
|
await ensureDir(effectiveStorageDir);
|
|
894
1081
|
try {
|
|
@@ -915,7 +1102,10 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
915
1102
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
916
1103
|
}
|
|
917
1104
|
// Update de-duplication index (best-effort). This is intentionally after atomic move.
|
|
918
|
-
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
|
|
1105
|
+
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt, 'complete');
|
|
1106
|
+
// Mark task complete
|
|
1107
|
+
reportProgress('complete', 100, `Bundle created: ${bundleId}`);
|
|
1108
|
+
tracker.completeTask(taskId, bundleId);
|
|
919
1109
|
const summary = {
|
|
920
1110
|
bundleId,
|
|
921
1111
|
createdAt,
|
|
@@ -929,8 +1119,15 @@ async function createBundleInternal(cfg, input, options) {
|
|
|
929
1119
|
// Clean up temp directory on failure
|
|
930
1120
|
logger.error(`Bundle creation failed, cleaning up temp: ${bundleId}`, err instanceof Error ? err : undefined);
|
|
931
1121
|
await rmIfExists(tmpPaths.rootDir);
|
|
932
|
-
//
|
|
1122
|
+
// Clear in-progress lock on failure
|
|
1123
|
+
await clearInProgressLock(cfg, fingerprint);
|
|
1124
|
+
// Mark task failed
|
|
933
1125
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
1126
|
+
tracker.failTask(taskId, errorMsg);
|
|
1127
|
+
// Re-throw with enhanced message (unless it's already our BUNDLE_IN_PROGRESS error)
|
|
1128
|
+
if (err?.code === 'BUNDLE_IN_PROGRESS') {
|
|
1129
|
+
throw err;
|
|
1130
|
+
}
|
|
934
1131
|
throw new Error(`Failed to create bundle: ${errorMsg}`);
|
|
935
1132
|
}
|
|
936
1133
|
finally {
|
|
@@ -1198,15 +1395,27 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1198
1395
|
const paths = getBundlePaths(effectiveStorageDir, bundleId);
|
|
1199
1396
|
const manifest = await readManifest(paths.manifestPath);
|
|
1200
1397
|
const updatedAt = nowIso();
|
|
1398
|
+
const onProgress = options?.onProgress;
|
|
1399
|
+
// Report progress helper
|
|
1400
|
+
const reportProgress = (phase, progress, message, total) => {
|
|
1401
|
+
if (onProgress) {
|
|
1402
|
+
onProgress(phase, progress, message, total);
|
|
1403
|
+
}
|
|
1404
|
+
};
|
|
1405
|
+
reportProgress('starting', 0, `Updating bundle ${bundleId}...`);
|
|
1201
1406
|
let changed = false;
|
|
1202
1407
|
const allIngestedFiles = [];
|
|
1203
1408
|
const reposSummary = [];
|
|
1409
|
+
const totalRepos = manifest.inputs.repos.length;
|
|
1410
|
+
let repoIndex = 0;
|
|
1204
1411
|
// Rebuild everything obvious for now (simple + deterministic).
|
|
1205
1412
|
for (const repoInput of manifest.inputs.repos) {
|
|
1413
|
+
repoIndex++;
|
|
1206
1414
|
if (repoInput.kind === 'github') {
|
|
1207
1415
|
const { owner, repo } = parseOwnerRepo(repoInput.repo);
|
|
1208
1416
|
const repoId = `${owner}/${repo}`;
|
|
1209
1417
|
const cloneUrl = toCloneUrl({ owner, repo });
|
|
1418
|
+
reportProgress('cloning', calcPercent(repoIndex - 1, totalRepos), `Checking ${repoId}...`, totalRepos);
|
|
1210
1419
|
let remoteSha;
|
|
1211
1420
|
try {
|
|
1212
1421
|
remoteSha = await getRemoteHeadSha(cloneUrl);
|
|
@@ -1218,6 +1427,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1218
1427
|
if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
|
|
1219
1428
|
changed = true;
|
|
1220
1429
|
}
|
|
1430
|
+
reportProgress('downloading', calcPercent(repoIndex - 1, totalRepos), `Fetching ${repoId}...`, totalRepos);
|
|
1221
1431
|
const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
|
|
1222
1432
|
cfg,
|
|
1223
1433
|
bundleId,
|
|
@@ -1225,6 +1435,9 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1225
1435
|
owner,
|
|
1226
1436
|
repo,
|
|
1227
1437
|
ref: repoInput.ref,
|
|
1438
|
+
onProgress: (phase, progress, message) => {
|
|
1439
|
+
reportProgress(phase, progress, message);
|
|
1440
|
+
},
|
|
1228
1441
|
});
|
|
1229
1442
|
if (prev?.headSha && headSha && headSha !== prev.headSha) {
|
|
1230
1443
|
changed = true;
|
|
@@ -1257,6 +1470,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1257
1470
|
// Context7 libraries (best-effort).
|
|
1258
1471
|
let librariesSummary;
|
|
1259
1472
|
if (manifest.inputs.libraries?.length) {
|
|
1473
|
+
reportProgress('downloading', 80, 'Fetching Context7 libraries...');
|
|
1260
1474
|
await rmIfExists(paths.librariesDir);
|
|
1261
1475
|
await ensureDir(paths.librariesDir);
|
|
1262
1476
|
const libIngest = await ingestContext7Libraries({
|
|
@@ -1269,6 +1483,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1269
1483
|
librariesSummary = libIngest.libraries;
|
|
1270
1484
|
}
|
|
1271
1485
|
// Rebuild index.
|
|
1486
|
+
reportProgress('indexing', 85, `Rebuilding search index (${allIngestedFiles.length} files)...`);
|
|
1272
1487
|
await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
|
|
1273
1488
|
includeDocs: manifest.index.includeDocs,
|
|
1274
1489
|
includeCode: manifest.index.includeCode,
|
|
@@ -1294,6 +1509,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1294
1509
|
};
|
|
1295
1510
|
await writeManifest(paths.manifestPath, newManifest);
|
|
1296
1511
|
// Regenerate guides + overview.
|
|
1512
|
+
reportProgress('generating', 90, 'Regenerating guides and overview...');
|
|
1297
1513
|
await writeAgentsMd(paths.agentsPath);
|
|
1298
1514
|
await writeStartHereMd({
|
|
1299
1515
|
targetPath: paths.startHerePath,
|
|
@@ -1316,6 +1532,7 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1316
1532
|
});
|
|
1317
1533
|
await writeOverviewFile(paths.overviewPath, overviewMd);
|
|
1318
1534
|
// Refresh static facts (FACTS.json) after update.
|
|
1535
|
+
reportProgress('analyzing', 95, 'Analyzing bundle...');
|
|
1319
1536
|
await generateFactsBestEffort({
|
|
1320
1537
|
bundleId,
|
|
1321
1538
|
bundleRoot: paths.rootDir,
|
|
@@ -1323,11 +1540,13 @@ export async function updateBundle(cfg, bundleId, options) {
|
|
|
1323
1540
|
mode: cfg.analysisMode,
|
|
1324
1541
|
});
|
|
1325
1542
|
// Mirror to backup storage directories (non-blocking on failures)
|
|
1543
|
+
reportProgress('finalizing', 98, 'Finalizing update...');
|
|
1326
1544
|
if (cfg.storageDirs.length > 1) {
|
|
1327
1545
|
await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
|
|
1328
1546
|
}
|
|
1329
1547
|
// Keep the de-duplication index fresh (best-effort).
|
|
1330
1548
|
await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
|
|
1549
|
+
reportProgress('complete', 100, `Bundle updated: ${bundleId}`);
|
|
1331
1550
|
const summary = {
|
|
1332
1551
|
bundleId,
|
|
1333
1552
|
createdAt: manifest.createdAt,
|
package/dist/config.js
CHANGED
|
@@ -88,5 +88,6 @@ export function getConfig() {
|
|
|
88
88
|
defaultMaxAgeHours: envNumber('PREFLIGHT_DEFAULT_MAX_AGE_HOURS', 24),
|
|
89
89
|
maxSearchLimit: envNumber('PREFLIGHT_MAX_SEARCH_LIMIT', 200),
|
|
90
90
|
defaultSearchLimit: envNumber('PREFLIGHT_DEFAULT_SEARCH_LIMIT', 30),
|
|
91
|
+
inProgressLockTimeoutMs: envNumber('PREFLIGHT_IN_PROGRESS_LOCK_TIMEOUT_MS', 30 * 60_000),
|
|
91
92
|
};
|
|
92
93
|
}
|