preflight-mcp 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,10 @@ import { writeAgentsMd, writeStartHereMd } from './guides.js';
11
11
  import { generateOverviewMarkdown, writeOverviewFile } from './overview.js';
12
12
  import { rebuildIndex } from '../search/sqliteFts.js';
13
13
  import { ingestContext7Libraries } from './context7.js';
14
- import { ingestDeepWikiRepo } from './deepwiki.js';
15
14
  import { analyzeBundleStatic } from './analysis.js';
16
15
  import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
17
16
  import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
17
+ import { getProgressTracker, calcPercent } from '../jobs/progressTracker.js';
18
18
  const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
19
19
  function sha256Hex(text) {
20
20
  return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
@@ -26,48 +26,20 @@ function normalizeList(values) {
26
26
  .map((s) => s.toLowerCase())
27
27
  .sort();
28
28
  }
29
- function normalizeDeepWikiUrl(raw) {
30
- const trimmed = raw.trim();
31
- try {
32
- const u = new URL(trimmed);
33
- u.hash = '';
34
- // Normalize host and strip trailing slash.
35
- u.host = u.host.toLowerCase();
36
- u.pathname = u.pathname.replace(/\/+$/g, '');
37
- return u.toString();
38
- }
39
- catch {
40
- return trimmed;
41
- }
42
- }
43
29
  function canonicalizeCreateInput(input) {
44
30
  const repos = input.repos
45
31
  .map((r) => {
46
- if (r.kind === 'github') {
47
- const { owner, repo } = parseOwnerRepo(r.repo);
48
- return {
49
- kind: 'github',
50
- repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
51
- ref: (r.ref ?? '').trim() || undefined,
52
- };
53
- }
54
- if (r.kind === 'local') {
55
- // For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
56
- const { owner, repo } = parseOwnerRepo(r.repo);
57
- return {
58
- kind: 'github',
59
- repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
60
- ref: (r.ref ?? '').trim() || undefined,
61
- };
62
- }
32
+ // For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
33
+ const { owner, repo } = parseOwnerRepo(r.repo);
63
34
  return {
64
- kind: 'deepwiki',
65
- url: normalizeDeepWikiUrl(r.url),
35
+ kind: 'github',
36
+ repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
37
+ ref: (r.ref ?? '').trim() || undefined,
66
38
  };
67
39
  })
68
40
  .sort((a, b) => {
69
- const ka = a.kind === 'github' ? `github:${a.repo}:${a.ref ?? ''}` : `deepwiki:${a.url}`;
70
- const kb = b.kind === 'github' ? `github:${b.repo}:${b.ref ?? ''}` : `deepwiki:${b.url}`;
41
+ const ka = `github:${a.repo}:${a.ref ?? ''}`;
42
+ const kb = `github:${b.repo}:${b.ref ?? ''}`;
71
43
  return ka.localeCompare(kb);
72
44
  });
73
45
  return {
@@ -113,13 +85,13 @@ async function writeDedupIndex(storageDir, idx) {
113
85
  try {
114
86
  await fs.unlink(tmpPath);
115
87
  }
116
- catch {
117
- // Ignore cleanup errors
88
+ catch (cleanupErr) {
89
+ logger.debug('Failed to cleanup temp dedup index file (non-critical)', cleanupErr instanceof Error ? cleanupErr : undefined);
118
90
  }
119
91
  throw err;
120
92
  }
121
93
  }
122
- async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
94
+ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt, status = 'complete') {
123
95
  for (const storageDir of cfg.storageDirs) {
124
96
  try {
125
97
  const parentAvailable = await isParentAvailable(storageDir);
@@ -127,14 +99,105 @@ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpda
127
99
  continue;
128
100
  await ensureDir(storageDir);
129
101
  const idx = await readDedupIndex(storageDir);
130
- idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
102
+ idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt, status };
131
103
  idx.updatedAt = nowIso();
132
104
  await writeDedupIndex(storageDir, idx);
133
105
  }
106
+ catch (err) {
107
+ logger.debug(`Failed to update dedup index in ${storageDir} (best-effort)`, err instanceof Error ? err : undefined);
108
+ }
109
+ }
110
+ }
111
+ /**
112
+ * Set in-progress lock for a fingerprint. Returns false if already locked (not timed out).
113
+ */
114
+ async function setInProgressLock(cfg, fingerprint, taskId, repos) {
115
+ const now = nowIso();
116
+ const nowMs = Date.now();
117
+ for (const storageDir of cfg.storageDirs) {
118
+ try {
119
+ if (!(await isPathAvailable(storageDir)))
120
+ continue;
121
+ await ensureDir(storageDir);
122
+ const idx = await readDedupIndex(storageDir);
123
+ const existing = idx.byFingerprint[fingerprint];
124
+ // Check if there's an existing in-progress lock
125
+ if (existing?.status === 'in-progress' && existing.startedAt) {
126
+ const startedMs = new Date(existing.startedAt).getTime();
127
+ const elapsed = nowMs - startedMs;
128
+ // If lock hasn't timed out, return the existing entry
129
+ if (elapsed < cfg.inProgressLockTimeoutMs) {
130
+ return { locked: false, existingEntry: existing };
131
+ }
132
+ // Lock timed out - will be overwritten
133
+ logger.warn(`In-progress lock timed out for fingerprint ${fingerprint.slice(0, 8)}...`);
134
+ }
135
+ // Set new in-progress lock
136
+ idx.byFingerprint[fingerprint] = {
137
+ bundleId: '', // Will be set on completion
138
+ bundleUpdatedAt: now,
139
+ status: 'in-progress',
140
+ startedAt: now,
141
+ taskId,
142
+ repos,
143
+ };
144
+ idx.updatedAt = now;
145
+ await writeDedupIndex(storageDir, idx);
146
+ return { locked: true };
147
+ }
148
+ catch (err) {
149
+ logger.debug(`Failed to set in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
150
+ }
151
+ }
152
+ // If we couldn't write to any storage, assume we can proceed (best-effort)
153
+ return { locked: true };
154
+ }
155
+ /**
156
+ * Clear in-progress lock (on failure or completion with status='complete').
157
+ */
158
+ async function clearInProgressLock(cfg, fingerprint) {
159
+ for (const storageDir of cfg.storageDirs) {
160
+ try {
161
+ if (!(await isPathAvailable(storageDir)))
162
+ continue;
163
+ const idx = await readDedupIndex(storageDir);
164
+ const existing = idx.byFingerprint[fingerprint];
165
+ // Only clear if it's in-progress
166
+ if (existing?.status === 'in-progress') {
167
+ delete idx.byFingerprint[fingerprint];
168
+ idx.updatedAt = nowIso();
169
+ await writeDedupIndex(storageDir, idx);
170
+ }
171
+ }
172
+ catch (err) {
173
+ logger.debug(`Failed to clear in-progress lock in ${storageDir}`, err instanceof Error ? err : undefined);
174
+ }
175
+ }
176
+ }
177
+ /**
178
+ * Check if a fingerprint has an in-progress lock (not timed out).
179
+ */
180
+ export async function checkInProgressLock(cfg, fingerprint) {
181
+ const nowMs = Date.now();
182
+ for (const storageDir of cfg.storageDirs) {
183
+ try {
184
+ if (!(await isPathAvailable(storageDir)))
185
+ continue;
186
+ const idx = await readDedupIndex(storageDir);
187
+ const existing = idx.byFingerprint[fingerprint];
188
+ if (existing?.status === 'in-progress' && existing.startedAt) {
189
+ const startedMs = new Date(existing.startedAt).getTime();
190
+ const elapsed = nowMs - startedMs;
191
+ if (elapsed < cfg.inProgressLockTimeoutMs) {
192
+ return existing;
193
+ }
194
+ }
195
+ }
134
196
  catch {
135
- // best-effort
197
+ // ignore
136
198
  }
137
199
  }
200
+ return null;
138
201
  }
139
202
  async function readBundleSummary(cfg, bundleId) {
140
203
  const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
@@ -166,6 +229,9 @@ async function findExistingBundleByFingerprint(cfg, fingerprint) {
166
229
  continue;
167
230
  const idx = await readDedupIndex(storageDir);
168
231
  const hit = idx.byFingerprint[fingerprint];
232
+ // Skip in-progress entries - they don't have a completed bundle yet
233
+ if (hit?.status === 'in-progress')
234
+ continue;
169
235
  if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
170
236
  return hit.bundleId;
171
237
  }
@@ -350,6 +416,33 @@ async function validateBundleCompleteness(bundleRoot) {
350
416
  missingComponents,
351
417
  };
352
418
  }
419
+ /**
420
+ * Assert that a bundle is complete and ready for operations.
421
+ * Throws an error with helpful guidance if the bundle is incomplete.
422
+ * Should be called at the entry point of tools that require a complete bundle
423
+ * (e.g., dependency graph, trace links, search).
424
+ */
425
+ export async function assertBundleComplete(cfg, bundleId) {
426
+ const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
427
+ if (!storageDir) {
428
+ throw new Error(`Bundle not found: ${bundleId}`);
429
+ }
430
+ const bundleRoot = getBundlePaths(storageDir, bundleId).rootDir;
431
+ const { isValid, missingComponents } = await validateBundleCompleteness(bundleRoot);
432
+ if (!isValid) {
433
+ const issues = missingComponents.join('\n - ');
434
+ throw new Error(`Bundle is incomplete and cannot be used for this operation.\n\n` +
435
+ `Bundle ID: ${bundleId}\n` +
436
+ `Missing components:\n - ${issues}\n\n` +
437
+ `This usually happens when:\n` +
438
+ `1. Bundle creation was interrupted (timeout, network error, etc.)\n` +
439
+ `2. Bundle download is still in progress\n\n` +
440
+ `Suggested actions:\n` +
441
+ `- Use preflight_update_bundle with force:true to re-download the repository\n` +
442
+ `- Or use preflight_delete_bundle and preflight_create_bundle to start fresh\n` +
443
+ `- Check preflight_get_task_status if creation might still be in progress`);
444
+ }
445
+ }
353
446
  /**
354
447
  * Detect primary language from ingested files
355
448
  */
@@ -657,7 +750,14 @@ async function cloneAndIngestGitHubRepo(params) {
657
750
  let fetchedAt = nowIso();
658
751
  let refUsed = params.ref;
659
752
  try {
660
- await shallowClone(cloneUrl, tmpCheckoutGit, { ref: params.ref, timeoutMs: params.cfg.gitCloneTimeoutMs });
753
+ params.onProgress?.('cloning', 0, `Cloning ${repoId}...`);
754
+ await shallowClone(cloneUrl, tmpCheckoutGit, {
755
+ ref: params.ref,
756
+ timeoutMs: params.cfg.gitCloneTimeoutMs,
757
+ onProgress: (phase, percent, msg) => {
758
+ params.onProgress?.('cloning', percent, `${repoId}: ${msg}`);
759
+ },
760
+ });
661
761
  headSha = await getLocalHeadSha(tmpCheckoutGit);
662
762
  }
663
763
  catch (err) {
@@ -665,12 +765,17 @@ async function cloneAndIngestGitHubRepo(params) {
665
765
  source = 'archive';
666
766
  const msg = err instanceof Error ? err.message : String(err);
667
767
  notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
768
+ params.onProgress?.('downloading', 0, `Downloading ${repoId} archive...`);
668
769
  const archive = await downloadAndExtractGitHubArchive({
669
770
  cfg: params.cfg,
670
771
  owner: params.owner,
671
772
  repo: params.repo,
672
773
  ref: params.ref,
673
774
  destDir: tmpArchiveDir,
775
+ onProgress: (downloaded, total, msg) => {
776
+ const percent = total ? Math.round((downloaded / total) * 100) : 0;
777
+ params.onProgress?.('downloading', percent, `${repoId}: ${msg}`);
778
+ },
674
779
  });
675
780
  repoRootForIngest = archive.repoRoot;
676
781
  fetchedAt = archive.fetchedAt;
@@ -750,6 +855,15 @@ export async function createBundle(cfg, input, options) {
750
855
  }
751
856
  async function createBundleInternal(cfg, input, options) {
752
857
  const fingerprint = computeCreateInputFingerprint(input);
858
+ const repoIds = input.repos.map((r) => r.repo);
859
+ const onProgress = options?.onProgress;
860
+ const tracker = getProgressTracker();
861
+ // Helper to report progress
862
+ const reportProgress = (phase, progress, message, total) => {
863
+ if (onProgress) {
864
+ onProgress(phase, progress, message, total);
865
+ }
866
+ };
753
867
  const ifExists = options?.ifExists ?? 'error';
754
868
  if (ifExists !== 'createNew') {
755
869
  const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
@@ -764,6 +878,28 @@ async function createBundleInternal(cfg, input, options) {
764
878
  throw new Error(`Bundle already exists for these inputs: ${existing}`);
765
879
  }
766
880
  }
881
+ // Start tracking this task
882
+ const taskId = tracker.startTask(fingerprint, repoIds);
883
+ reportProgress('starting', 0, `Starting bundle creation for ${repoIds.join(', ')}`);
884
+ // Try to acquire in-progress lock
885
+ const lockResult = await setInProgressLock(cfg, fingerprint, taskId, repoIds);
886
+ if (!lockResult.locked) {
887
+ // Another task is already creating this bundle
888
+ const entry = lockResult.existingEntry;
889
+ const elapsedSec = entry.startedAt
890
+ ? Math.round((Date.now() - new Date(entry.startedAt).getTime()) / 1000)
891
+ : 0;
892
+ const msg = `Bundle creation already in progress (taskId: ${entry.taskId}, started ${elapsedSec}s ago). ` +
893
+ `Use preflight_get_task_status to check progress.`;
894
+ // Throw a special error that can be caught and handled
895
+ const err = new Error(msg);
896
+ err.code = 'BUNDLE_IN_PROGRESS';
897
+ err.taskId = entry.taskId;
898
+ err.fingerprint = fingerprint;
899
+ err.repos = entry.repos;
900
+ err.startedAt = entry.startedAt;
901
+ throw err;
902
+ }
767
903
  const bundleId = crypto.randomUUID();
768
904
  const createdAt = nowIso();
769
905
  // Use effective storage dir (falls back if primary unavailable)
@@ -778,9 +914,15 @@ async function createBundleInternal(cfg, input, options) {
778
914
  const reposSummary = [];
779
915
  try {
780
916
  // All operations happen in tmpPaths (temporary directory)
917
+ const totalRepos = input.repos.length;
918
+ let repoIndex = 0;
781
919
  for (const repoInput of input.repos) {
920
+ repoIndex++;
921
+ const repoProgress = Math.round((repoIndex - 1) / totalRepos * 40); // 0-40% for repo fetching
782
922
  if (repoInput.kind === 'github') {
783
923
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
924
+ reportProgress('cloning', repoProgress, `[${repoIndex}/${totalRepos}] Fetching ${owner}/${repo}...`);
925
+ tracker.updateProgress(taskId, 'cloning', repoProgress, `Fetching ${owner}/${repo}...`);
784
926
  const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
785
927
  cfg,
786
928
  bundleId,
@@ -788,6 +930,12 @@ async function createBundleInternal(cfg, input, options) {
788
930
  owner,
789
931
  repo,
790
932
  ref: repoInput.ref,
933
+ onProgress: (phase, percent, msg) => {
934
+ // Map clone/download progress to overall progress (0-40% range per repo)
935
+ const overallProgress = repoProgress + Math.round(percent * 0.4 / totalRepos);
936
+ reportProgress(phase, overallProgress, `[${repoIndex}/${totalRepos}] ${msg}`);
937
+ tracker.updateProgress(taskId, phase, overallProgress, msg);
938
+ },
791
939
  });
792
940
  allIngestedFiles.push(...files);
793
941
  reposSummary.push({
@@ -798,8 +946,11 @@ async function createBundleInternal(cfg, input, options) {
798
946
  notes: [...notes, ...skipped].slice(0, 50),
799
947
  });
800
948
  }
801
- else if (repoInput.kind === 'local') {
949
+ else {
950
+ // Local repository
802
951
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
952
+ reportProgress('ingesting', repoProgress, `[${repoIndex}/${totalRepos}] Ingesting local ${owner}/${repo}...`);
953
+ tracker.updateProgress(taskId, 'ingesting', repoProgress, `Ingesting local ${owner}/${repo}...`);
803
954
  const { files, skipped } = await ingestLocalRepo({
804
955
  cfg,
805
956
  bundleId,
@@ -812,21 +963,6 @@ async function createBundleInternal(cfg, input, options) {
812
963
  allIngestedFiles.push(...files);
813
964
  reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
814
965
  }
815
- else {
816
- // DeepWiki integration: fetch and convert to Markdown.
817
- const deepwikiResult = await ingestDeepWikiRepo({
818
- cfg,
819
- bundlePaths: tmpPaths,
820
- url: repoInput.url,
821
- });
822
- allIngestedFiles.push(...deepwikiResult.files);
823
- reposSummary.push({
824
- kind: 'deepwiki',
825
- id: deepwikiResult.summary.repoId,
826
- source: 'deepwiki',
827
- notes: deepwikiResult.summary.notes,
828
- });
829
- }
830
966
  }
831
967
  // Context7 libraries (best-effort).
832
968
  let librariesSummary;
@@ -844,6 +980,8 @@ async function createBundleInternal(cfg, input, options) {
844
980
  librariesSummary = libIngest.libraries;
845
981
  }
846
982
  // Build index.
983
+ reportProgress('indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
984
+ tracker.updateProgress(taskId, 'indexing', 50, `Building search index (${allIngestedFiles.length} files)...`);
847
985
  await rebuildIndex(tmpPaths.searchDbPath, allIngestedFiles, {
848
986
  includeDocs: true,
849
987
  includeCode: true,
@@ -901,7 +1039,18 @@ async function createBundleInternal(cfg, input, options) {
901
1039
  repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
902
1040
  libraries: librariesSummary,
903
1041
  });
904
- // Overview (S2: factual-only with evidence pointers).
1042
+ // Generate static facts (FACTS.json) FIRST. This is intentionally non-LLM and safe to keep inside bundles.
1043
+ reportProgress('analyzing', 70, 'Analyzing code structure...');
1044
+ tracker.updateProgress(taskId, 'analyzing', 70, 'Analyzing code structure...');
1045
+ await generateFactsBestEffort({
1046
+ bundleId,
1047
+ bundleRoot: tmpPaths.rootDir,
1048
+ files: allIngestedFiles,
1049
+ mode: cfg.analysisMode,
1050
+ });
1051
+ // Overview (S2: factual-only with evidence pointers) - generated AFTER FACTS.json
1052
+ reportProgress('generating', 80, 'Generating overview...');
1053
+ tracker.updateProgress(taskId, 'generating', 80, 'Generating overview...');
905
1054
  const perRepoOverviews = reposSummary
906
1055
  .filter((r) => r.kind === 'github' || r.kind === 'local')
907
1056
  .map((r) => {
@@ -916,13 +1065,6 @@ async function createBundleInternal(cfg, input, options) {
916
1065
  libraries: librariesSummary,
917
1066
  });
918
1067
  await writeOverviewFile(tmpPaths.overviewPath, overviewMd);
919
- // Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
920
- await generateFactsBestEffort({
921
- bundleId,
922
- bundleRoot: tmpPaths.rootDir,
923
- files: allIngestedFiles,
924
- mode: cfg.analysisMode,
925
- });
926
1068
  // CRITICAL: Validate bundle completeness BEFORE atomic move
927
1069
  const validation = await validateBundleCompleteness(tmpPaths.rootDir);
928
1070
  if (!validation.isValid) {
@@ -932,6 +1074,8 @@ async function createBundleInternal(cfg, input, options) {
932
1074
  }
933
1075
  // ATOMIC OPERATION: Move from temp to final location
934
1076
  // This is atomic on most filesystems - bundle becomes visible only when complete
1077
+ reportProgress('finalizing', 90, 'Finalizing bundle...');
1078
+ tracker.updateProgress(taskId, 'finalizing', 90, 'Finalizing bundle...');
935
1079
  logger.info(`Moving bundle ${bundleId} from temp to final location (atomic)`);
936
1080
  await ensureDir(effectiveStorageDir);
937
1081
  try {
@@ -958,7 +1102,10 @@ async function createBundleInternal(cfg, input, options) {
958
1102
  await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
959
1103
  }
960
1104
  // Update de-duplication index (best-effort). This is intentionally after atomic move.
961
- await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
1105
+ await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt, 'complete');
1106
+ // Mark task complete
1107
+ reportProgress('complete', 100, `Bundle created: ${bundleId}`);
1108
+ tracker.completeTask(taskId, bundleId);
962
1109
  const summary = {
963
1110
  bundleId,
964
1111
  createdAt,
@@ -972,14 +1119,21 @@ async function createBundleInternal(cfg, input, options) {
972
1119
  // Clean up temp directory on failure
973
1120
  logger.error(`Bundle creation failed, cleaning up temp: ${bundleId}`, err instanceof Error ? err : undefined);
974
1121
  await rmIfExists(tmpPaths.rootDir);
975
- // Enhance error message
1122
+ // Clear in-progress lock on failure
1123
+ await clearInProgressLock(cfg, fingerprint);
1124
+ // Mark task failed
976
1125
  const errorMsg = err instanceof Error ? err.message : String(err);
1126
+ tracker.failTask(taskId, errorMsg);
1127
+ // Re-throw with enhanced message (unless it's already our BUNDLE_IN_PROGRESS error)
1128
+ if (err?.code === 'BUNDLE_IN_PROGRESS') {
1129
+ throw err;
1130
+ }
977
1131
  throw new Error(`Failed to create bundle: ${errorMsg}`);
978
1132
  }
979
1133
  finally {
980
1134
  // Ensure temp directory is cleaned up (double safety)
981
- await rmIfExists(tmpPaths.rootDir).catch(() => {
982
- // Ignore cleanup errors
1135
+ await rmIfExists(tmpPaths.rootDir).catch((err) => {
1136
+ logger.debug('Failed to cleanup temp bundle directory in finally block (non-critical)', err instanceof Error ? err : undefined);
983
1137
  });
984
1138
  }
985
1139
  }
@@ -1008,19 +1162,14 @@ export async function checkForUpdates(cfg, bundleId) {
1008
1162
  hasUpdates = true;
1009
1163
  details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
1010
1164
  }
1011
- else if (repoInput.kind === 'local') {
1165
+ else {
1166
+ // Local: can't reliably detect whether local files changed without scanning; assume possible update.
1012
1167
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
1013
1168
  const repoId = `${owner}/${repo}`;
1014
- // We can't reliably detect whether local files changed without scanning; assume possible update.
1015
1169
  const prev = manifest.repos.find((r) => r.id === repoId);
1016
1170
  details.push({ repoId, currentSha: prev?.headSha, changed: true });
1017
1171
  hasUpdates = true;
1018
1172
  }
1019
- else {
1020
- // DeepWiki: can't easily detect changes, assume possible update
1021
- details.push({ repoId: repoInput.url, changed: true });
1022
- hasUpdates = true;
1023
- }
1024
1173
  }
1025
1174
  return { hasUpdates, details };
1026
1175
  }
@@ -1122,41 +1271,6 @@ async function scanBundleIndexableFiles(params) {
1122
1271
  });
1123
1272
  }
1124
1273
  }
1125
- // 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
1126
- const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
1127
- const dwSt = await statOrNull(deepwikiDir);
1128
- if (dwSt?.isDirectory()) {
1129
- // Only walk the norm subtrees.
1130
- const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
1131
- for (const ownerEnt of owners) {
1132
- if (!ownerEnt.isDirectory())
1133
- continue;
1134
- const owner = ownerEnt.name;
1135
- const ownerDir = path.join(deepwikiDir, owner);
1136
- const repos = await fs.readdir(ownerDir, { withFileTypes: true });
1137
- for (const repoEnt of repos) {
1138
- if (!repoEnt.isDirectory())
1139
- continue;
1140
- const repo = repoEnt.name;
1141
- const normDir = path.join(ownerDir, repo, 'norm');
1142
- const normSt = await statOrNull(normDir);
1143
- if (!normSt?.isDirectory())
1144
- continue;
1145
- for await (const wf of walkFilesNoIgnore(normDir)) {
1146
- if (!wf.relPosix.toLowerCase().endsWith('.md'))
1147
- continue;
1148
- const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
1149
- await pushFile({
1150
- repoId: `deepwiki:${owner}/${repo}`,
1151
- kind: 'doc',
1152
- repoRelativePath: wf.relPosix,
1153
- bundleRelPosix: bundleRel,
1154
- absPath: wf.absPath,
1155
- });
1156
- }
1157
- }
1158
- }
1159
- }
1160
1274
  return { files, totalBytes, skipped };
1161
1275
  }
1162
1276
  export async function repairBundle(cfg, bundleId, options) {
@@ -1281,15 +1395,27 @@ export async function updateBundle(cfg, bundleId, options) {
1281
1395
  const paths = getBundlePaths(effectiveStorageDir, bundleId);
1282
1396
  const manifest = await readManifest(paths.manifestPath);
1283
1397
  const updatedAt = nowIso();
1398
+ const onProgress = options?.onProgress;
1399
+ // Report progress helper
1400
+ const reportProgress = (phase, progress, message, total) => {
1401
+ if (onProgress) {
1402
+ onProgress(phase, progress, message, total);
1403
+ }
1404
+ };
1405
+ reportProgress('starting', 0, `Updating bundle ${bundleId}...`);
1284
1406
  let changed = false;
1285
1407
  const allIngestedFiles = [];
1286
1408
  const reposSummary = [];
1409
+ const totalRepos = manifest.inputs.repos.length;
1410
+ let repoIndex = 0;
1287
1411
  // Rebuild everything obvious for now (simple + deterministic).
1288
1412
  for (const repoInput of manifest.inputs.repos) {
1413
+ repoIndex++;
1289
1414
  if (repoInput.kind === 'github') {
1290
1415
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
1291
1416
  const repoId = `${owner}/${repo}`;
1292
1417
  const cloneUrl = toCloneUrl({ owner, repo });
1418
+ reportProgress('cloning', calcPercent(repoIndex - 1, totalRepos), `Checking ${repoId}...`, totalRepos);
1293
1419
  let remoteSha;
1294
1420
  try {
1295
1421
  remoteSha = await getRemoteHeadSha(cloneUrl);
@@ -1301,6 +1427,7 @@ export async function updateBundle(cfg, bundleId, options) {
1301
1427
  if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
1302
1428
  changed = true;
1303
1429
  }
1430
+ reportProgress('downloading', calcPercent(repoIndex - 1, totalRepos), `Fetching ${repoId}...`, totalRepos);
1304
1431
  const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
1305
1432
  cfg,
1306
1433
  bundleId,
@@ -1308,6 +1435,9 @@ export async function updateBundle(cfg, bundleId, options) {
1308
1435
  owner,
1309
1436
  repo,
1310
1437
  ref: repoInput.ref,
1438
+ onProgress: (phase, progress, message) => {
1439
+ reportProgress(phase, progress, message);
1440
+ },
1311
1441
  });
1312
1442
  if (prev?.headSha && headSha && headSha !== prev.headSha) {
1313
1443
  changed = true;
@@ -1319,7 +1449,8 @@ export async function updateBundle(cfg, bundleId, options) {
1319
1449
  allIngestedFiles.push(...files);
1320
1450
  reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
1321
1451
  }
1322
- else if (repoInput.kind === 'local') {
1452
+ else {
1453
+ // Local repository
1323
1454
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
1324
1455
  const repoId = `${owner}/${repo}`;
1325
1456
  const { files, skipped } = await ingestLocalRepo({
@@ -1335,27 +1466,11 @@ export async function updateBundle(cfg, bundleId, options) {
1335
1466
  reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
1336
1467
  changed = true;
1337
1468
  }
1338
- else {
1339
- // DeepWiki integration: fetch and convert to Markdown.
1340
- const deepwikiResult = await ingestDeepWikiRepo({
1341
- cfg,
1342
- bundlePaths: paths,
1343
- url: repoInput.url,
1344
- });
1345
- allIngestedFiles.push(...deepwikiResult.files);
1346
- reposSummary.push({
1347
- kind: 'deepwiki',
1348
- id: deepwikiResult.summary.repoId,
1349
- source: 'deepwiki',
1350
- notes: deepwikiResult.summary.notes,
1351
- });
1352
- // Always mark as changed for DeepWiki since we can't easily detect content changes.
1353
- changed = true;
1354
- }
1355
1469
  }
1356
1470
  // Context7 libraries (best-effort).
1357
1471
  let librariesSummary;
1358
1472
  if (manifest.inputs.libraries?.length) {
1473
+ reportProgress('downloading', 80, 'Fetching Context7 libraries...');
1359
1474
  await rmIfExists(paths.librariesDir);
1360
1475
  await ensureDir(paths.librariesDir);
1361
1476
  const libIngest = await ingestContext7Libraries({
@@ -1368,6 +1483,7 @@ export async function updateBundle(cfg, bundleId, options) {
1368
1483
  librariesSummary = libIngest.libraries;
1369
1484
  }
1370
1485
  // Rebuild index.
1486
+ reportProgress('indexing', 85, `Rebuilding search index (${allIngestedFiles.length} files)...`);
1371
1487
  await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
1372
1488
  includeDocs: manifest.index.includeDocs,
1373
1489
  includeCode: manifest.index.includeCode,
@@ -1393,6 +1509,7 @@ export async function updateBundle(cfg, bundleId, options) {
1393
1509
  };
1394
1510
  await writeManifest(paths.manifestPath, newManifest);
1395
1511
  // Regenerate guides + overview.
1512
+ reportProgress('generating', 90, 'Regenerating guides and overview...');
1396
1513
  await writeAgentsMd(paths.agentsPath);
1397
1514
  await writeStartHereMd({
1398
1515
  targetPath: paths.startHerePath,
@@ -1415,6 +1532,7 @@ export async function updateBundle(cfg, bundleId, options) {
1415
1532
  });
1416
1533
  await writeOverviewFile(paths.overviewPath, overviewMd);
1417
1534
  // Refresh static facts (FACTS.json) after update.
1535
+ reportProgress('analyzing', 95, 'Analyzing bundle...');
1418
1536
  await generateFactsBestEffort({
1419
1537
  bundleId,
1420
1538
  bundleRoot: paths.rootDir,
@@ -1422,11 +1540,13 @@ export async function updateBundle(cfg, bundleId, options) {
1422
1540
  mode: cfg.analysisMode,
1423
1541
  });
1424
1542
  // Mirror to backup storage directories (non-blocking on failures)
1543
+ reportProgress('finalizing', 98, 'Finalizing update...');
1425
1544
  if (cfg.storageDirs.length > 1) {
1426
1545
  await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
1427
1546
  }
1428
1547
  // Keep the de-duplication index fresh (best-effort).
1429
1548
  await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
1549
+ reportProgress('complete', 100, `Bundle updated: ${bundleId}`);
1430
1550
  const summary = {
1431
1551
  bundleId,
1432
1552
  createdAt: manifest.createdAt,