preflight-mcp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,8 @@ import fs from 'node:fs/promises';
3
3
  import path from 'node:path';
4
4
  import { logger } from '../logging/logger.js';
5
5
  import { getLocalHeadSha, getRemoteHeadSha, parseOwnerRepo, shallowClone, toCloneUrl, } from './github.js';
6
- import { ingestRepoToBundle } from './ingest.js';
6
+ import { downloadAndExtractGitHubArchive } from './githubArchive.js';
7
+ import { classifyIngestedFileKind, ingestRepoToBundle } from './ingest.js';
7
8
  import { writeManifest, readManifest } from './manifest.js';
8
9
  import { getBundlePaths, repoMetaPath, repoNormDir, repoRawDir, repoRootDir } from './paths.js';
9
10
  import { writeAgentsMd, writeStartHereMd } from './guides.js';
@@ -13,22 +14,261 @@ import { ingestContext7Libraries } from './context7.js';
13
14
  import { ingestDeepWikiRepo } from './deepwiki.js';
14
15
  import { analyzeBundleStatic } from './analysis.js';
15
16
  import { autoDetectTags, generateDisplayName, generateDescription } from './tagging.js';
17
+ import { bundleCreationLimiter } from '../core/concurrency-limiter.js';
18
+ const DEDUP_INDEX_FILE = '.preflight-dedup-index.json';
19
+ function sha256Hex(text) {
20
+ return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
21
+ }
22
+ function normalizeList(values) {
23
+ return (values ?? [])
24
+ .map((s) => s.trim())
25
+ .filter(Boolean)
26
+ .map((s) => s.toLowerCase())
27
+ .sort();
28
+ }
29
+ function normalizeDeepWikiUrl(raw) {
30
+ const trimmed = raw.trim();
31
+ try {
32
+ const u = new URL(trimmed);
33
+ u.hash = '';
34
+ // Normalize host and strip trailing slash.
35
+ u.host = u.host.toLowerCase();
36
+ u.pathname = u.pathname.replace(/\/+$/g, '');
37
+ return u.toString();
38
+ }
39
+ catch {
40
+ return trimmed;
41
+ }
42
+ }
43
+ function canonicalizeCreateInput(input) {
44
+ const repos = input.repos
45
+ .map((r) => {
46
+ if (r.kind === 'github') {
47
+ const { owner, repo } = parseOwnerRepo(r.repo);
48
+ return {
49
+ kind: 'github',
50
+ repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
51
+ ref: (r.ref ?? '').trim() || undefined,
52
+ };
53
+ }
54
+ if (r.kind === 'local') {
55
+ // For de-duplication, treat local imports as equivalent to github imports of the same logical repo/ref.
56
+ const { owner, repo } = parseOwnerRepo(r.repo);
57
+ return {
58
+ kind: 'github',
59
+ repo: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
60
+ ref: (r.ref ?? '').trim() || undefined,
61
+ };
62
+ }
63
+ return {
64
+ kind: 'deepwiki',
65
+ url: normalizeDeepWikiUrl(r.url),
66
+ };
67
+ })
68
+ .sort((a, b) => {
69
+ const ka = a.kind === 'github' ? `github:${a.repo}:${a.ref ?? ''}` : `deepwiki:${a.url}`;
70
+ const kb = b.kind === 'github' ? `github:${b.repo}:${b.ref ?? ''}` : `deepwiki:${b.url}`;
71
+ return ka.localeCompare(kb);
72
+ });
73
+ return {
74
+ schemaVersion: 1,
75
+ repos,
76
+ libraries: normalizeList(input.libraries),
77
+ topics: normalizeList(input.topics),
78
+ };
79
+ }
80
+ export function computeCreateInputFingerprint(input) {
81
+ const canonical = canonicalizeCreateInput(input);
82
+ return sha256Hex(JSON.stringify(canonical));
83
+ }
84
+ function dedupIndexPath(storageDir) {
85
+ return path.join(storageDir, DEDUP_INDEX_FILE);
86
+ }
87
+ async function readDedupIndex(storageDir) {
88
+ const p = dedupIndexPath(storageDir);
89
+ try {
90
+ const raw = await fs.readFile(p, 'utf8');
91
+ const parsed = JSON.parse(raw);
92
+ if (parsed.schemaVersion !== 1 || typeof parsed.byFingerprint !== 'object' || !parsed.byFingerprint) {
93
+ return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
94
+ }
95
+ return parsed;
96
+ }
97
+ catch {
98
+ return { schemaVersion: 1, updatedAt: nowIso(), byFingerprint: {} };
99
+ }
100
+ }
101
+ async function writeDedupIndex(storageDir, idx) {
102
+ const p = dedupIndexPath(storageDir);
103
+ await ensureDir(path.dirname(p));
104
+ // Use atomic write (write to temp file, then rename) to prevent corruption
105
+ const tmpPath = `${p}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
106
+ try {
107
+ await fs.writeFile(tmpPath, JSON.stringify(idx, null, 2) + '\n', 'utf8');
108
+ // Atomic rename on POSIX; near-atomic on Windows
109
+ await fs.rename(tmpPath, p);
110
+ }
111
+ catch (err) {
112
+ // Clean up temp file on error
113
+ try {
114
+ await fs.unlink(tmpPath);
115
+ }
116
+ catch {
117
+ // Ignore cleanup errors
118
+ }
119
+ throw err;
120
+ }
121
+ }
122
+ async function updateDedupIndexBestEffort(cfg, fingerprint, bundleId, bundleUpdatedAt) {
123
+ for (const storageDir of cfg.storageDirs) {
124
+ try {
125
+ const parentAvailable = await isParentAvailable(storageDir);
126
+ if (!parentAvailable)
127
+ continue;
128
+ await ensureDir(storageDir);
129
+ const idx = await readDedupIndex(storageDir);
130
+ idx.byFingerprint[fingerprint] = { bundleId, bundleUpdatedAt };
131
+ idx.updatedAt = nowIso();
132
+ await writeDedupIndex(storageDir, idx);
133
+ }
134
+ catch {
135
+ // best-effort
136
+ }
137
+ }
138
+ }
139
+ async function readBundleSummary(cfg, bundleId) {
140
+ const storageDir = (await findBundleStorageDir(cfg.storageDirs, bundleId)) ?? (await getEffectiveStorageDir(cfg));
141
+ const paths = getBundlePaths(storageDir, bundleId);
142
+ const manifest = await readManifest(paths.manifestPath);
143
+ return {
144
+ bundleId: manifest.bundleId,
145
+ createdAt: manifest.createdAt,
146
+ updatedAt: manifest.updatedAt,
147
+ repos: manifest.repos.map((r) => ({
148
+ kind: r.kind,
149
+ id: r.id,
150
+ source: r.source,
151
+ headSha: r.headSha,
152
+ notes: r.notes,
153
+ })),
154
+ libraries: manifest.libraries,
155
+ };
156
+ }
157
+ export async function findBundleByInputs(cfg, input) {
158
+ const fingerprint = computeCreateInputFingerprint(input);
159
+ return findExistingBundleByFingerprint(cfg, fingerprint);
160
+ }
161
+ async function findExistingBundleByFingerprint(cfg, fingerprint) {
162
+ // Fast path: consult any available dedup index.
163
+ for (const storageDir of cfg.storageDirs) {
164
+ try {
165
+ if (!(await isPathAvailable(storageDir)))
166
+ continue;
167
+ const idx = await readDedupIndex(storageDir);
168
+ const hit = idx.byFingerprint[fingerprint];
169
+ if (hit?.bundleId && (await bundleExistsMulti(cfg.storageDirs, hit.bundleId))) {
170
+ return hit.bundleId;
171
+ }
172
+ }
173
+ catch {
174
+ // ignore
175
+ }
176
+ }
177
+ // Slow path: scan manifests (works even for bundles created before fingerprints existed).
178
+ let best = null;
179
+ for (const storageDir of cfg.storageDirs) {
180
+ if (!(await isPathAvailable(storageDir)))
181
+ continue;
182
+ const ids = await listBundles(storageDir);
183
+ for (const id of ids) {
184
+ try {
185
+ const paths = getBundlePaths(storageDir, id);
186
+ const manifest = await readManifest(paths.manifestPath);
187
+ const fp = computeCreateInputFingerprint({
188
+ repos: manifest.inputs.repos,
189
+ libraries: manifest.inputs.libraries,
190
+ topics: manifest.inputs.topics,
191
+ });
192
+ if (fp === fingerprint) {
193
+ const updatedAt = manifest.updatedAt;
194
+ if (!best || new Date(updatedAt) > new Date(best.updatedAt)) {
195
+ best = { bundleId: manifest.bundleId, updatedAt };
196
+ }
197
+ }
198
+ }
199
+ catch {
200
+ // ignore corrupt bundles
201
+ }
202
+ }
203
+ }
204
+ if (best) {
205
+ // Seed index for next time (best-effort).
206
+ await updateDedupIndexBestEffort(cfg, fingerprint, best.bundleId, best.updatedAt);
207
+ return best.bundleId;
208
+ }
209
+ return null;
210
+ }
16
211
  async function ensureDir(p) {
17
212
  await fs.mkdir(p, { recursive: true });
18
213
  }
19
214
  function nowIso() {
20
215
  return new Date().toISOString();
21
216
  }
217
+ function toPosix(p) {
218
+ return p.replaceAll('\\', '/');
219
+ }
220
+ function sha256Text(text) {
221
+ return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
222
+ }
223
+ async function statOrNull(p) {
224
+ try {
225
+ return await fs.stat(p);
226
+ }
227
+ catch {
228
+ return null;
229
+ }
230
+ }
231
+ async function readUtf8OrNull(p) {
232
+ try {
233
+ return await fs.readFile(p, 'utf8');
234
+ }
235
+ catch {
236
+ return null;
237
+ }
238
+ }
239
+ async function* walkFilesNoIgnore(rootDir) {
240
+ const stack = [rootDir];
241
+ while (stack.length) {
242
+ const dir = stack.pop();
243
+ const entries = await fs.readdir(dir, { withFileTypes: true });
244
+ for (const ent of entries) {
245
+ const abs = path.join(dir, ent.name);
246
+ const rel = toPosix(path.relative(rootDir, abs));
247
+ if (ent.isDirectory()) {
248
+ stack.push(abs);
249
+ continue;
250
+ }
251
+ if (!ent.isFile())
252
+ continue;
253
+ yield { absPath: abs, relPosix: rel };
254
+ }
255
+ }
256
+ }
22
257
  async function writeRepoMeta(params) {
23
258
  await ensureDir(path.dirname(params.metaPath));
24
259
  const obj = {
25
260
  repoId: params.repoId,
26
261
  cloneUrl: params.cloneUrl,
27
- headSha: params.headSha,
28
262
  fetchedAt: params.fetchedAt,
29
263
  ingestedFiles: params.ingestedFiles,
30
264
  skipped: params.skipped,
31
265
  };
266
+ if (params.headSha)
267
+ obj.headSha = params.headSha;
268
+ if (params.source)
269
+ obj.source = params.source;
270
+ if (params.ref)
271
+ obj.ref = params.ref;
32
272
  await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
33
273
  }
34
274
  async function rmIfExists(p) {
@@ -205,26 +445,47 @@ async function mirrorBundleToBackups(primaryDir, backupDirs, bundleId) {
205
445
  const srcPath = path.join(primaryDir, bundleId);
206
446
  const mirrored = [];
207
447
  const failed = [];
208
- for (const backupDir of backupDirs) {
209
- if (backupDir === primaryDir)
210
- continue; // Skip primary
448
+ // Mirror to all backup dirs in parallel for better performance
449
+ const mirrorPromises = backupDirs
450
+ .filter(dir => dir !== primaryDir) // Skip primary
451
+ .map(async (backupDir) => {
211
452
  const destPath = path.join(backupDir, bundleId);
212
453
  try {
213
454
  // Check if backup location is available
214
455
  const parentAvailable = await isParentAvailable(destPath);
215
456
  if (!parentAvailable) {
216
- failed.push({ path: backupDir, error: 'Mount not available' });
217
- continue;
457
+ return { success: false, path: backupDir, error: 'Mount not available' };
218
458
  }
219
459
  // Ensure backup dir exists
220
460
  await ensureDir(backupDir);
221
461
  // Remove old and copy new
222
462
  await rmIfExists(destPath);
223
463
  await copyDir(srcPath, destPath);
224
- mirrored.push(backupDir);
464
+ return { success: true, path: backupDir };
225
465
  }
226
466
  catch (err) {
227
- failed.push({ path: backupDir, error: err instanceof Error ? err.message : String(err) });
467
+ return {
468
+ success: false,
469
+ path: backupDir,
470
+ error: err instanceof Error ? err.message : String(err)
471
+ };
472
+ }
473
+ });
474
+ // Wait for all mirror operations to complete
475
+ const results = await Promise.allSettled(mirrorPromises);
476
+ for (const result of results) {
477
+ if (result.status === 'fulfilled') {
478
+ const { success, path: backupPath, error } = result.value;
479
+ if (success) {
480
+ mirrored.push(backupPath);
481
+ }
482
+ else {
483
+ failed.push({ path: backupPath, error: error ?? 'Unknown error' });
484
+ }
485
+ }
486
+ else {
487
+ // Promise rejection (shouldn't happen with try-catch, but handle it)
488
+ failed.push({ path: 'unknown', error: result.reason?.message ?? String(result.reason) });
228
489
  }
229
490
  }
230
491
  return { mirrored, failed };
@@ -330,13 +591,91 @@ async function syncStaleBackups(sourceDir, allDirs, bundleId) {
330
591
  }
331
592
  }
332
593
  }
594
+ async function writeLocalRepoMeta(params) {
595
+ await ensureDir(path.dirname(params.metaPath));
596
+ const obj = {
597
+ repoId: params.repoId,
598
+ source: 'local',
599
+ localPath: params.localPath,
600
+ ref: params.ref,
601
+ fetchedAt: params.fetchedAt,
602
+ ingestedFiles: params.ingestedFiles,
603
+ skipped: params.skipped,
604
+ };
605
+ await fs.writeFile(params.metaPath, JSON.stringify(obj, null, 2) + '\n', 'utf8');
606
+ }
607
+ async function ingestLocalRepo(params) {
608
+ const repoId = `${params.owner}/${params.repo}`;
609
+ const repoRoot = path.resolve(params.localPath);
610
+ const st = await fs.stat(repoRoot);
611
+ if (!st.isDirectory()) {
612
+ throw new Error(`Local repo path is not a directory: ${repoRoot}`);
613
+ }
614
+ const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
615
+ const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
616
+ const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
617
+ await rmIfExists(rawDest);
618
+ await rmIfExists(normDest);
619
+ await ensureDir(rawDest);
620
+ await ensureDir(normDest);
621
+ const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
622
+ const ingested = await ingestRepoToBundle({
623
+ repoId,
624
+ repoRoot,
625
+ rawDestRoot: rawDest,
626
+ normDestRoot: normDest,
627
+ bundleNormPrefixPosix,
628
+ options: {
629
+ maxFileBytes: params.cfg.maxFileBytes,
630
+ maxTotalBytes: params.cfg.maxTotalBytes,
631
+ },
632
+ });
633
+ const fetchedAt = nowIso();
634
+ await writeLocalRepoMeta({
635
+ metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
636
+ repoId,
637
+ localPath: repoRoot,
638
+ ref: params.ref,
639
+ fetchedAt,
640
+ ingestedFiles: ingested.files.length,
641
+ skipped: ingested.skipped,
642
+ });
643
+ return { files: ingested.files, skipped: ingested.skipped };
644
+ }
333
645
  async function cloneAndIngestGitHubRepo(params) {
334
646
  const repoId = `${params.owner}/${params.repo}`;
335
647
  const cloneUrl = toCloneUrl({ owner: params.owner, repo: params.repo });
336
- const tmpCheckout = path.join(params.cfg.tmpDir, 'checkouts', params.bundleId, `${params.owner}__${params.repo}`);
337
- await rmIfExists(tmpCheckout);
338
- await shallowClone(cloneUrl, tmpCheckout, { ref: params.ref });
339
- const headSha = await getLocalHeadSha(tmpCheckout);
648
+ const tmpBase = path.join(params.cfg.tmpDir, 'checkouts', params.bundleId, `${params.owner}__${params.repo}`);
649
+ const tmpCheckoutGit = tmpBase;
650
+ const tmpArchiveDir = `${tmpBase}__archive`;
651
+ await rmIfExists(tmpCheckoutGit);
652
+ await rmIfExists(tmpArchiveDir);
653
+ let repoRootForIngest = tmpCheckoutGit;
654
+ let headSha;
655
+ const notes = [];
656
+ let source = 'git';
657
+ let fetchedAt = nowIso();
658
+ let refUsed = params.ref;
659
+ try {
660
+ await shallowClone(cloneUrl, tmpCheckoutGit, { ref: params.ref, timeoutMs: params.cfg.gitCloneTimeoutMs });
661
+ headSha = await getLocalHeadSha(tmpCheckoutGit);
662
+ }
663
+ catch (err) {
664
+ // Fallback: GitHub archive download (zipball) + extract.
665
+ source = 'archive';
666
+ const msg = err instanceof Error ? err.message : String(err);
667
+ notes.push(`git clone failed; used GitHub archive fallback: ${msg}`);
668
+ const archive = await downloadAndExtractGitHubArchive({
669
+ cfg: params.cfg,
670
+ owner: params.owner,
671
+ repo: params.repo,
672
+ ref: params.ref,
673
+ destDir: tmpArchiveDir,
674
+ });
675
+ repoRootForIngest = archive.repoRoot;
676
+ fetchedAt = archive.fetchedAt;
677
+ refUsed = archive.refUsed;
678
+ }
340
679
  const bundlePaths = getBundlePaths(params.storageDir, params.bundleId);
341
680
  const rawDest = repoRawDir(bundlePaths, params.owner, params.repo);
342
681
  const normDest = repoNormDir(bundlePaths, params.owner, params.repo);
@@ -347,7 +686,7 @@ async function cloneAndIngestGitHubRepo(params) {
347
686
  const bundleNormPrefixPosix = `repos/${params.owner}/${params.repo}/norm`;
348
687
  const ingested = await ingestRepoToBundle({
349
688
  repoId,
350
- repoRoot: tmpCheckout,
689
+ repoRoot: repoRootForIngest,
351
690
  rawDestRoot: rawDest,
352
691
  normDestRoot: normDest,
353
692
  bundleNormPrefixPosix,
@@ -356,7 +695,6 @@ async function cloneAndIngestGitHubRepo(params) {
356
695
  maxTotalBytes: params.cfg.maxTotalBytes,
357
696
  },
358
697
  });
359
- const fetchedAt = nowIso();
360
698
  await writeRepoMeta({
361
699
  metaPath: repoMetaPath(bundlePaths, params.owner, params.repo),
362
700
  repoId,
@@ -365,9 +703,12 @@ async function cloneAndIngestGitHubRepo(params) {
365
703
  fetchedAt,
366
704
  ingestedFiles: ingested.files.length,
367
705
  skipped: ingested.skipped,
706
+ source,
707
+ ref: refUsed,
368
708
  });
369
- await rmIfExists(tmpCheckout);
370
- return { headSha, files: ingested.files, skipped: ingested.skipped };
709
+ await rmIfExists(tmpCheckoutGit);
710
+ await rmIfExists(tmpArchiveDir);
711
+ return { headSha, files: ingested.files, skipped: ingested.skipped, notes, source };
371
712
  }
372
713
  function groupFilesByRepoId(files) {
373
714
  const byRepo = new Map();
@@ -401,44 +742,88 @@ async function generateFactsBestEffort(params) {
401
742
  logger.error('Static analysis exception', err instanceof Error ? err : undefined);
402
743
  }
403
744
  }
404
- export async function createBundle(cfg, input) {
745
+ export async function createBundle(cfg, input, options) {
746
+ // Apply concurrency limiting to prevent DoS attacks
747
+ return await bundleCreationLimiter.run(async () => {
748
+ return await createBundleInternal(cfg, input, options);
749
+ });
750
+ }
751
+ async function createBundleInternal(cfg, input, options) {
752
+ const fingerprint = computeCreateInputFingerprint(input);
753
+ const ifExists = options?.ifExists ?? 'error';
754
+ if (ifExists !== 'createNew') {
755
+ const existing = await findExistingBundleByFingerprint(cfg, fingerprint);
756
+ if (existing) {
757
+ if (ifExists === 'returnExisting') {
758
+ return await readBundleSummary(cfg, existing);
759
+ }
760
+ if (ifExists === 'updateExisting') {
761
+ const { summary } = await updateBundle(cfg, existing);
762
+ return summary;
763
+ }
764
+ throw new Error(`Bundle already exists for these inputs: ${existing}`);
765
+ }
766
+ }
405
767
  const bundleId = crypto.randomUUID();
406
768
  const createdAt = nowIso();
407
769
  // Use effective storage dir (falls back if primary unavailable)
408
770
  const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
409
- await ensureDir(cfg.tmpDir);
410
- const paths = getBundlePaths(effectiveStorageDir, bundleId);
411
- await ensureDir(paths.rootDir);
412
- let bundleCreated = false;
771
+ // Create bundle in temporary directory for atomic creation
772
+ const tmpBundlesDir = path.join(cfg.tmpDir, 'bundles-wip');
773
+ await ensureDir(tmpBundlesDir);
774
+ const tmpPaths = getBundlePaths(tmpBundlesDir, bundleId);
775
+ await ensureDir(tmpPaths.rootDir);
776
+ const finalPaths = getBundlePaths(effectiveStorageDir, bundleId);
413
777
  const allIngestedFiles = [];
414
778
  const reposSummary = [];
415
779
  try {
416
- bundleCreated = true; // Mark that bundle directory was created
780
+ // All operations happen in tmpPaths (temporary directory)
417
781
  for (const repoInput of input.repos) {
418
782
  if (repoInput.kind === 'github') {
419
783
  const { owner, repo } = parseOwnerRepo(repoInput.repo);
420
- const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
784
+ const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
421
785
  cfg,
422
786
  bundleId,
423
- storageDir: effectiveStorageDir,
787
+ storageDir: tmpBundlesDir,
424
788
  owner,
425
789
  repo,
426
790
  ref: repoInput.ref,
427
791
  });
428
792
  allIngestedFiles.push(...files);
429
- reposSummary.push({ kind: 'github', id: `${owner}/${repo}`, headSha, notes: skipped.slice(0, 50) });
793
+ reposSummary.push({
794
+ kind: 'github',
795
+ id: `${owner}/${repo}`,
796
+ source,
797
+ headSha,
798
+ notes: [...notes, ...skipped].slice(0, 50),
799
+ });
800
+ }
801
+ else if (repoInput.kind === 'local') {
802
+ const { owner, repo } = parseOwnerRepo(repoInput.repo);
803
+ const { files, skipped } = await ingestLocalRepo({
804
+ cfg,
805
+ bundleId,
806
+ storageDir: tmpBundlesDir,
807
+ owner,
808
+ repo,
809
+ localPath: repoInput.path,
810
+ ref: repoInput.ref,
811
+ });
812
+ allIngestedFiles.push(...files);
813
+ reposSummary.push({ kind: 'local', id: `${owner}/${repo}`, source: 'local', notes: skipped.slice(0, 50) });
430
814
  }
431
815
  else {
432
816
  // DeepWiki integration: fetch and convert to Markdown.
433
817
  const deepwikiResult = await ingestDeepWikiRepo({
434
818
  cfg,
435
- bundlePaths: paths,
819
+ bundlePaths: tmpPaths,
436
820
  url: repoInput.url,
437
821
  });
438
822
  allIngestedFiles.push(...deepwikiResult.files);
439
823
  reposSummary.push({
440
824
  kind: 'deepwiki',
441
825
  id: deepwikiResult.summary.repoId,
826
+ source: 'deepwiki',
442
827
  notes: deepwikiResult.summary.notes,
443
828
  });
444
829
  }
@@ -447,11 +832,11 @@ export async function createBundle(cfg, input) {
447
832
  let librariesSummary;
448
833
  if (input.libraries?.length) {
449
834
  // Clean libraries dir in case something wrote here earlier.
450
- await rmIfExists(paths.librariesDir);
451
- await ensureDir(paths.librariesDir);
835
+ await rmIfExists(tmpPaths.librariesDir);
836
+ await ensureDir(tmpPaths.librariesDir);
452
837
  const libIngest = await ingestContext7Libraries({
453
838
  cfg,
454
- bundlePaths: paths,
839
+ bundlePaths: tmpPaths,
455
840
  libraries: input.libraries,
456
841
  topics: input.topics,
457
842
  });
@@ -459,7 +844,7 @@ export async function createBundle(cfg, input) {
459
844
  librariesSummary = libIngest.libraries;
460
845
  }
461
846
  // Build index.
462
- await rebuildIndex(paths.searchDbPath, allIngestedFiles, {
847
+ await rebuildIndex(tmpPaths.searchDbPath, allIngestedFiles, {
463
848
  includeDocs: true,
464
849
  includeCode: true,
465
850
  });
@@ -482,6 +867,7 @@ export async function createBundle(cfg, input) {
482
867
  bundleId,
483
868
  createdAt,
484
869
  updatedAt: createdAt,
870
+ fingerprint,
485
871
  displayName,
486
872
  description,
487
873
  tags,
@@ -494,6 +880,7 @@ export async function createBundle(cfg, input) {
494
880
  repos: reposSummary.map((r) => ({
495
881
  kind: r.kind,
496
882
  id: r.id,
883
+ source: r.source,
497
884
  headSha: r.headSha,
498
885
  fetchedAt: createdAt,
499
886
  notes: r.notes,
@@ -505,18 +892,18 @@ export async function createBundle(cfg, input) {
505
892
  includeCode: true,
506
893
  },
507
894
  };
508
- await writeManifest(paths.manifestPath, manifest);
895
+ await writeManifest(tmpPaths.manifestPath, manifest);
509
896
  // Guides.
510
- await writeAgentsMd(paths.agentsPath);
897
+ await writeAgentsMd(tmpPaths.agentsPath);
511
898
  await writeStartHereMd({
512
- targetPath: paths.startHerePath,
899
+ targetPath: tmpPaths.startHerePath,
513
900
  bundleId,
514
901
  repos: reposSummary.map((r) => ({ id: r.id, headSha: r.headSha })),
515
902
  libraries: librariesSummary,
516
903
  });
517
904
  // Overview (S2: factual-only with evidence pointers).
518
905
  const perRepoOverviews = reposSummary
519
- .filter((r) => r.kind === 'github')
906
+ .filter((r) => r.kind === 'github' || r.kind === 'local')
520
907
  .map((r) => {
521
908
  const repoId = r.id;
522
909
  const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
@@ -524,29 +911,54 @@ export async function createBundle(cfg, input) {
524
911
  });
525
912
  const overviewMd = await generateOverviewMarkdown({
526
913
  bundleId,
527
- bundleRootDir: paths.rootDir,
914
+ bundleRootDir: tmpPaths.rootDir,
528
915
  repos: perRepoOverviews,
529
916
  libraries: librariesSummary,
530
917
  });
531
- await writeOverviewFile(paths.overviewPath, overviewMd);
918
+ await writeOverviewFile(tmpPaths.overviewPath, overviewMd);
532
919
  // Generate static facts (FACTS.json). This is intentionally non-LLM and safe to keep inside bundles.
533
920
  await generateFactsBestEffort({
534
921
  bundleId,
535
- bundleRoot: paths.rootDir,
922
+ bundleRoot: tmpPaths.rootDir,
536
923
  files: allIngestedFiles,
537
924
  mode: cfg.analysisMode,
538
925
  });
539
- // Mirror to backup storage directories (non-blocking on failures)
540
- if (cfg.storageDirs.length > 1) {
541
- await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
542
- }
543
- // CRITICAL: Validate bundle completeness before finalizing
544
- const validation = await validateBundleCompleteness(paths.rootDir);
926
+ // CRITICAL: Validate bundle completeness BEFORE atomic move
927
+ const validation = await validateBundleCompleteness(tmpPaths.rootDir);
545
928
  if (!validation.isValid) {
546
929
  const errorMsg = `Bundle creation incomplete. Missing: ${validation.missingComponents.join(', ')}`;
547
930
  logger.error(errorMsg);
548
931
  throw new Error(errorMsg);
549
932
  }
933
+ // ATOMIC OPERATION: Move from temp to final location
934
+ // This is atomic on most filesystems - bundle becomes visible only when complete
935
+ logger.info(`Moving bundle ${bundleId} from temp to final location (atomic)`);
936
+ await ensureDir(effectiveStorageDir);
937
+ try {
938
+ // Try rename first (atomic, but only works on same filesystem)
939
+ await fs.rename(tmpPaths.rootDir, finalPaths.rootDir);
940
+ logger.info(`Bundle ${bundleId} moved atomically to ${finalPaths.rootDir}`);
941
+ }
942
+ catch (renameErr) {
943
+ // Rename failed - likely cross-filesystem. Fall back to copy+delete
944
+ const errCode = renameErr.code;
945
+ if (errCode === 'EXDEV') {
946
+ logger.warn(`Cross-filesystem move detected for ${bundleId}, falling back to copy`);
947
+ await copyDir(tmpPaths.rootDir, finalPaths.rootDir);
948
+ await rmIfExists(tmpPaths.rootDir);
949
+ logger.info(`Bundle ${bundleId} copied to ${finalPaths.rootDir}`);
950
+ }
951
+ else {
952
+ // Some other error, rethrow
953
+ throw renameErr;
954
+ }
955
+ }
956
+ // Mirror to backup storage directories (non-blocking on failures)
957
+ if (cfg.storageDirs.length > 1) {
958
+ await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
959
+ }
960
+ // Update de-duplication index (best-effort). This is intentionally after atomic move.
961
+ await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, createdAt);
550
962
  const summary = {
551
963
  bundleId,
552
964
  createdAt,
@@ -557,15 +969,19 @@ export async function createBundle(cfg, input) {
557
969
  return summary;
558
970
  }
559
971
  catch (err) {
560
- // If bundle directory was created, clean it up
561
- if (bundleCreated) {
562
- logger.error(`Bundle creation failed, cleaning up: ${bundleId}`, err instanceof Error ? err : undefined);
563
- await cleanupFailedBundle(cfg, bundleId);
564
- }
972
+ // Clean up temp directory on failure
973
+ logger.error(`Bundle creation failed, cleaning up temp: ${bundleId}`, err instanceof Error ? err : undefined);
974
+ await rmIfExists(tmpPaths.rootDir);
565
975
  // Enhance error message
566
976
  const errorMsg = err instanceof Error ? err.message : String(err);
567
977
  throw new Error(`Failed to create bundle: ${errorMsg}`);
568
978
  }
979
+ finally {
980
+ // Ensure temp directory is cleaned up (double safety)
981
+ await rmIfExists(tmpPaths.rootDir).catch(() => {
982
+ // Ignore cleanup errors
983
+ });
984
+ }
569
985
  }
570
986
  /** Check if a bundle has upstream changes without applying updates. */
571
987
  export async function checkForUpdates(cfg, bundleId) {
@@ -592,6 +1008,14 @@ export async function checkForUpdates(cfg, bundleId) {
592
1008
  hasUpdates = true;
593
1009
  details.push({ repoId, currentSha: prev?.headSha, remoteSha, changed });
594
1010
  }
1011
+ else if (repoInput.kind === 'local') {
1012
+ const { owner, repo } = parseOwnerRepo(repoInput.repo);
1013
+ const repoId = `${owner}/${repo}`;
1014
+ // We can't reliably detect whether local files changed without scanning; assume possible update.
1015
+ const prev = manifest.repos.find((r) => r.id === repoId);
1016
+ details.push({ repoId, currentSha: prev?.headSha, changed: true });
1017
+ hasUpdates = true;
1018
+ }
595
1019
  else {
596
1020
  // DeepWiki: can't easily detect changes, assume possible update
597
1021
  details.push({ repoId: repoInput.url, changed: true });
@@ -600,6 +1024,257 @@ export async function checkForUpdates(cfg, bundleId) {
600
1024
  }
601
1025
  return { hasUpdates, details };
602
1026
  }
1027
+ async function scanBundleIndexableFiles(params) {
1028
+ const files = [];
1029
+ const skipped = [];
1030
+ let totalBytes = 0;
1031
+ const pushFile = async (f) => {
1032
+ const st = await statOrNull(f.absPath);
1033
+ if (!st?.isFile())
1034
+ return;
1035
+ if (st.size > params.cfg.maxFileBytes) {
1036
+ skipped.push(`${f.bundleRelPosix} (too large: ${st.size} bytes)`);
1037
+ return;
1038
+ }
1039
+ if (totalBytes + st.size > params.cfg.maxTotalBytes) {
1040
+ skipped.push(`(bundle maxTotalBytes reached) stopped before: ${f.bundleRelPosix}`);
1041
+ return;
1042
+ }
1043
+ const text = await readUtf8OrNull(f.absPath);
1044
+ if (text === null) {
1045
+ skipped.push(`${f.bundleRelPosix} (unreadable utf8)`);
1046
+ return;
1047
+ }
1048
+ const normalized = text.replace(/\r\n/g, '\n');
1049
+ const sha256 = sha256Text(normalized);
1050
+ totalBytes += st.size;
1051
+ files.push({
1052
+ repoId: f.repoId,
1053
+ kind: f.kind,
1054
+ repoRelativePath: f.repoRelativePath,
1055
+ bundleNormRelativePath: f.bundleRelPosix,
1056
+ bundleNormAbsPath: f.absPath,
1057
+ sha256,
1058
+ bytes: st.size,
1059
+ });
1060
+ };
1061
+ // 1) repos/<owner>/<repo>/norm/** (github/local)
1062
+ try {
1063
+ const owners = await fs.readdir(params.reposDir, { withFileTypes: true });
1064
+ for (const ownerEnt of owners) {
1065
+ if (!ownerEnt.isDirectory())
1066
+ continue;
1067
+ const owner = ownerEnt.name;
1068
+ const ownerDir = path.join(params.reposDir, owner);
1069
+ const repos = await fs.readdir(ownerDir, { withFileTypes: true });
1070
+ for (const repoEnt of repos) {
1071
+ if (!repoEnt.isDirectory())
1072
+ continue;
1073
+ const repo = repoEnt.name;
1074
+ const normDir = path.join(ownerDir, repo, 'norm');
1075
+ const normSt = await statOrNull(normDir);
1076
+ if (!normSt?.isDirectory())
1077
+ continue;
1078
+ for await (const wf of walkFilesNoIgnore(normDir)) {
1079
+ const repoRel = wf.relPosix;
1080
+ const kind = classifyIngestedFileKind(repoRel);
1081
+ const bundleRel = `repos/${owner}/${repo}/norm/${repoRel}`;
1082
+ await pushFile({
1083
+ repoId: `${owner}/${repo}`,
1084
+ kind,
1085
+ repoRelativePath: repoRel,
1086
+ bundleRelPosix: bundleRel,
1087
+ absPath: wf.absPath,
1088
+ });
1089
+ }
1090
+ }
1091
+ }
1092
+ }
1093
+ catch {
1094
+ // ignore missing repos dir
1095
+ }
1096
+ // 2) libraries/context7/** (docs-only)
1097
+ const context7Dir = path.join(params.librariesDir, 'context7');
1098
+ const ctxSt = await statOrNull(context7Dir);
1099
+ if (ctxSt?.isDirectory()) {
1100
+ for await (const wf of walkFilesNoIgnore(context7Dir)) {
1101
+ // Match original ingestion: only .md docs are indexed from Context7.
1102
+ if (!wf.relPosix.toLowerCase().endsWith('.md'))
1103
+ continue;
1104
+ const relFromLibRoot = wf.relPosix; // relative to libraries/context7
1105
+ const parts = relFromLibRoot.split('/').filter(Boolean);
1106
+ const fileName = parts[parts.length - 1] ?? '';
1107
+ const dirParts = parts.slice(0, -1);
1108
+ let repoId = 'context7:unknown';
1109
+ if (dirParts[0] === '_unresolved' && dirParts[1]) {
1110
+ repoId = `context7:unresolved/${dirParts[1]}`;
1111
+ }
1112
+ else if (dirParts.length > 0) {
1113
+ repoId = `context7:/${dirParts.join('/')}`;
1114
+ }
1115
+ const bundleRel = `libraries/context7/${relFromLibRoot}`;
1116
+ await pushFile({
1117
+ repoId,
1118
+ kind: 'doc',
1119
+ repoRelativePath: fileName,
1120
+ bundleRelPosix: bundleRel,
1121
+ absPath: wf.absPath,
1122
+ });
1123
+ }
1124
+ }
1125
+ // 3) deepwiki/<owner>/<repo>/norm/** (docs-only)
1126
+ const deepwikiDir = path.join(params.bundleRootDir, 'deepwiki');
1127
+ const dwSt = await statOrNull(deepwikiDir);
1128
+ if (dwSt?.isDirectory()) {
1129
+ // Only walk the norm subtrees.
1130
+ const owners = await fs.readdir(deepwikiDir, { withFileTypes: true });
1131
+ for (const ownerEnt of owners) {
1132
+ if (!ownerEnt.isDirectory())
1133
+ continue;
1134
+ const owner = ownerEnt.name;
1135
+ const ownerDir = path.join(deepwikiDir, owner);
1136
+ const repos = await fs.readdir(ownerDir, { withFileTypes: true });
1137
+ for (const repoEnt of repos) {
1138
+ if (!repoEnt.isDirectory())
1139
+ continue;
1140
+ const repo = repoEnt.name;
1141
+ const normDir = path.join(ownerDir, repo, 'norm');
1142
+ const normSt = await statOrNull(normDir);
1143
+ if (!normSt?.isDirectory())
1144
+ continue;
1145
+ for await (const wf of walkFilesNoIgnore(normDir)) {
1146
+ if (!wf.relPosix.toLowerCase().endsWith('.md'))
1147
+ continue;
1148
+ const bundleRel = `deepwiki/${owner}/${repo}/norm/${wf.relPosix}`;
1149
+ await pushFile({
1150
+ repoId: `deepwiki:${owner}/${repo}`,
1151
+ kind: 'doc',
1152
+ repoRelativePath: wf.relPosix,
1153
+ bundleRelPosix: bundleRel,
1154
+ absPath: wf.absPath,
1155
+ });
1156
+ }
1157
+ }
1158
+ }
1159
+ }
1160
+ return { files, totalBytes, skipped };
1161
+ }
1162
+ export async function repairBundle(cfg, bundleId, options) {
1163
+ const mode = options?.mode ?? 'repair';
1164
+ const rebuildIndexOpt = options?.rebuildIndex ?? true;
1165
+ const rebuildGuidesOpt = options?.rebuildGuides ?? true;
1166
+ const rebuildOverviewOpt = options?.rebuildOverview ?? true;
1167
+ const storageDir = await findBundleStorageDir(cfg.storageDirs, bundleId);
1168
+ if (!storageDir) {
1169
+ throw new Error(`Bundle not found: ${bundleId}`);
1170
+ }
1171
+ const paths = getBundlePaths(storageDir, bundleId);
1172
+ const before = await validateBundleCompleteness(paths.rootDir);
1173
+ if (mode === 'validate') {
1174
+ return {
1175
+ bundleId,
1176
+ mode,
1177
+ repaired: false,
1178
+ actionsTaken: [],
1179
+ before,
1180
+ after: before,
1181
+ };
1182
+ }
1183
+ // Manifest is required for safe repairs (no fetching/re-ingest).
1184
+ const manifest = await readManifest(paths.manifestPath);
1185
+ const actionsTaken = [];
1186
+ // Determine what needs repair.
1187
+ const stAgents = await statOrNull(paths.agentsPath);
1188
+ const stStartHere = await statOrNull(paths.startHerePath);
1189
+ const stOverview = await statOrNull(paths.overviewPath);
1190
+ const stIndex = await statOrNull(paths.searchDbPath);
1191
+ const needsAgents = !stAgents || stAgents.size === 0;
1192
+ const needsStartHere = !stStartHere || stStartHere.size === 0;
1193
+ const needsOverview = !stOverview || stOverview.size === 0;
1194
+ const needsIndex = !stIndex || stIndex.size === 0;
1195
+ // Scan bundle files once if needed for index/overview.
1196
+ let scanned = null;
1197
+ const needScan = (rebuildIndexOpt && needsIndex) || (rebuildOverviewOpt && needsOverview);
1198
+ if (needScan) {
1199
+ scanned = await scanBundleIndexableFiles({
1200
+ cfg,
1201
+ bundleRootDir: paths.rootDir,
1202
+ reposDir: paths.reposDir,
1203
+ librariesDir: paths.librariesDir,
1204
+ });
1205
+ if (scanned.skipped.length) {
1206
+ actionsTaken.push(`scan: skipped ${scanned.skipped.length} file(s)`);
1207
+ }
1208
+ }
1209
+ if (rebuildIndexOpt && needsIndex) {
1210
+ const files = scanned?.files ?? [];
1211
+ await rebuildIndex(paths.searchDbPath, files, { includeDocs: true, includeCode: true });
1212
+ actionsTaken.push(`rebuildIndex: indexed ${files.length} file(s)`);
1213
+ }
1214
+ if (rebuildGuidesOpt && needsAgents) {
1215
+ await writeAgentsMd(paths.agentsPath);
1216
+ actionsTaken.push('writeAgentsMd');
1217
+ }
1218
+ if (rebuildGuidesOpt && needsStartHere) {
1219
+ await writeStartHereMd({
1220
+ targetPath: paths.startHerePath,
1221
+ bundleId,
1222
+ repos: (manifest.repos ?? []).map((r) => ({ id: r.id, headSha: r.headSha })),
1223
+ libraries: manifest.libraries,
1224
+ });
1225
+ actionsTaken.push('writeStartHereMd');
1226
+ }
1227
+ if (rebuildOverviewOpt && needsOverview) {
1228
+ const allFiles = scanned?.files ?? [];
1229
+ const perRepoOverviews = (manifest.repos ?? [])
1230
+ .filter((r) => r.kind === 'github' || r.kind === 'local')
1231
+ .map((r) => {
1232
+ const repoId = r.id;
1233
+ const repoFiles = allFiles.filter((f) => f.repoId === repoId);
1234
+ return { repoId, headSha: r.headSha, files: repoFiles };
1235
+ });
1236
+ const md = await generateOverviewMarkdown({
1237
+ bundleId,
1238
+ bundleRootDir: paths.rootDir,
1239
+ repos: perRepoOverviews,
1240
+ libraries: manifest.libraries,
1241
+ });
1242
+ await writeOverviewFile(paths.overviewPath, md);
1243
+ actionsTaken.push('writeOverviewFile');
1244
+ }
1245
+ let updatedAt;
1246
+ if (actionsTaken.length > 0) {
1247
+ updatedAt = nowIso();
1248
+ const fingerprint = manifest.fingerprint ??
1249
+ computeCreateInputFingerprint({
1250
+ repos: manifest.inputs.repos,
1251
+ libraries: manifest.inputs.libraries,
1252
+ topics: manifest.inputs.topics,
1253
+ });
1254
+ const newManifest = {
1255
+ ...manifest,
1256
+ updatedAt,
1257
+ fingerprint,
1258
+ };
1259
+ await writeManifest(paths.manifestPath, newManifest);
1260
+ // Keep the de-duplication index fresh (best-effort).
1261
+ await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
1262
+ // Mirror to backup storage directories (non-blocking on failures)
1263
+ if (cfg.storageDirs.length > 1) {
1264
+ await mirrorBundleToBackups(storageDir, cfg.storageDirs, bundleId);
1265
+ }
1266
+ }
1267
+ const after = await validateBundleCompleteness(paths.rootDir);
1268
+ return {
1269
+ bundleId,
1270
+ mode,
1271
+ repaired: actionsTaken.length > 0,
1272
+ actionsTaken,
1273
+ before,
1274
+ after,
1275
+ updatedAt,
1276
+ };
1277
+ }
603
1278
  export async function updateBundle(cfg, bundleId, options) {
604
1279
  // Use effective storage dir (falls back if primary unavailable)
605
1280
  const effectiveStorageDir = await getEffectiveStorageDirForWrite(cfg);
@@ -626,7 +1301,7 @@ export async function updateBundle(cfg, bundleId, options) {
626
1301
  if (remoteSha && prev?.headSha && remoteSha !== prev.headSha) {
627
1302
  changed = true;
628
1303
  }
629
- const { headSha, files, skipped } = await cloneAndIngestGitHubRepo({
1304
+ const { headSha, files, skipped, notes, source } = await cloneAndIngestGitHubRepo({
630
1305
  cfg,
631
1306
  bundleId,
632
1307
  storageDir: effectiveStorageDir,
@@ -634,11 +1309,31 @@ export async function updateBundle(cfg, bundleId, options) {
634
1309
  repo,
635
1310
  ref: repoInput.ref,
636
1311
  });
637
- if (prev?.headSha && headSha !== prev.headSha) {
1312
+ if (prev?.headSha && headSha && headSha !== prev.headSha) {
638
1313
  changed = true;
639
1314
  }
1315
+ // If we had to fall back to an archive, treat as changed (we don't have git metadata).
1316
+ if (source === 'archive') {
1317
+ changed = true;
1318
+ }
1319
+ allIngestedFiles.push(...files);
1320
+ reposSummary.push({ kind: 'github', id: repoId, source, headSha, notes: [...notes, ...skipped].slice(0, 50) });
1321
+ }
1322
+ else if (repoInput.kind === 'local') {
1323
+ const { owner, repo } = parseOwnerRepo(repoInput.repo);
1324
+ const repoId = `${owner}/${repo}`;
1325
+ const { files, skipped } = await ingestLocalRepo({
1326
+ cfg,
1327
+ bundleId,
1328
+ storageDir: effectiveStorageDir,
1329
+ owner,
1330
+ repo,
1331
+ localPath: repoInput.path,
1332
+ ref: repoInput.ref,
1333
+ });
640
1334
  allIngestedFiles.push(...files);
641
- reposSummary.push({ kind: 'github', id: repoId, headSha, notes: skipped.slice(0, 50) });
1335
+ reposSummary.push({ kind: 'local', id: repoId, source: 'local', notes: skipped.slice(0, 50) });
1336
+ changed = true;
642
1337
  }
643
1338
  else {
644
1339
  // DeepWiki integration: fetch and convert to Markdown.
@@ -651,6 +1346,7 @@ export async function updateBundle(cfg, bundleId, options) {
651
1346
  reposSummary.push({
652
1347
  kind: 'deepwiki',
653
1348
  id: deepwikiResult.summary.repoId,
1349
+ source: 'deepwiki',
654
1350
  notes: deepwikiResult.summary.notes,
655
1351
  });
656
1352
  // Always mark as changed for DeepWiki since we can't easily detect content changes.
@@ -676,12 +1372,19 @@ export async function updateBundle(cfg, bundleId, options) {
676
1372
  includeDocs: manifest.index.includeDocs,
677
1373
  includeCode: manifest.index.includeCode,
678
1374
  });
1375
+ const fingerprint = computeCreateInputFingerprint({
1376
+ repos: manifest.inputs.repos,
1377
+ libraries: manifest.inputs.libraries,
1378
+ topics: manifest.inputs.topics,
1379
+ });
679
1380
  const newManifest = {
680
1381
  ...manifest,
681
1382
  updatedAt,
1383
+ fingerprint,
682
1384
  repos: reposSummary.map((r) => ({
683
1385
  kind: r.kind,
684
1386
  id: r.id,
1387
+ source: r.source,
685
1388
  headSha: r.headSha,
686
1389
  fetchedAt: updatedAt,
687
1390
  notes: r.notes,
@@ -698,7 +1401,7 @@ export async function updateBundle(cfg, bundleId, options) {
698
1401
  libraries: librariesSummary,
699
1402
  });
700
1403
  const perRepoOverviews = reposSummary
701
- .filter((r) => r.kind === 'github')
1404
+ .filter((r) => r.kind === 'github' || r.kind === 'local')
702
1405
  .map((r) => {
703
1406
  const repoId = r.id;
704
1407
  const repoFiles = allIngestedFiles.filter((f) => f.repoId === repoId);
@@ -722,6 +1425,8 @@ export async function updateBundle(cfg, bundleId, options) {
722
1425
  if (cfg.storageDirs.length > 1) {
723
1426
  await mirrorBundleToBackups(effectiveStorageDir, cfg.storageDirs, bundleId);
724
1427
  }
1428
+ // Keep the de-duplication index fresh (best-effort).
1429
+ await updateDedupIndexBestEffort(cfg, fingerprint, bundleId, updatedAt);
725
1430
  const summary = {
726
1431
  bundleId,
727
1432
  createdAt: manifest.createdAt,
@@ -731,11 +1436,22 @@ export async function updateBundle(cfg, bundleId, options) {
731
1436
  };
732
1437
  return { summary, changed };
733
1438
  }
1439
+ /**
1440
+ * Check if a string is a valid UUID (v4 format).
1441
+ * Bundle IDs should be UUIDs with dashes.
1442
+ */
1443
+ function isValidBundleId(id) {
1444
+ // UUID v4 format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx
1445
+ const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
1446
+ return uuidRegex.test(id);
1447
+ }
734
1448
  /** List bundles from a single storage directory. */
735
1449
  export async function listBundles(storageDir) {
736
1450
  try {
737
1451
  const entries = await fs.readdir(storageDir, { withFileTypes: true });
738
- return entries.filter((e) => e.isDirectory()).map((e) => e.name);
1452
+ return entries
1453
+ .filter((e) => e.isDirectory() && isValidBundleId(e.name))
1454
+ .map((e) => e.name);
739
1455
  }
740
1456
  catch {
741
1457
  return [];
@@ -781,18 +1497,45 @@ export async function clearBundle(storageDir, bundleId) {
781
1497
  const p = getBundlePaths(storageDir, bundleId);
782
1498
  await rmIfExists(p.rootDir);
783
1499
  }
784
- /** Clear bundle from ALL storage directories (mirror delete). */
1500
+ /**
1501
+ * Clear bundle from ALL storage directories (mirror delete).
1502
+ * Uses fast rename + background deletion to avoid blocking.
1503
+ */
785
1504
  export async function clearBundleMulti(storageDirs, bundleId) {
786
1505
  let deleted = false;
787
1506
  for (const dir of storageDirs) {
788
1507
  try {
789
- if (await bundleExists(dir, bundleId)) {
1508
+ const paths = getBundlePaths(dir, bundleId);
1509
+ // Check if the bundle directory exists
1510
+ try {
1511
+ await fs.stat(paths.rootDir);
1512
+ }
1513
+ catch {
1514
+ // Directory doesn't exist, skip
1515
+ continue;
1516
+ }
1517
+ // Fast deletion strategy: rename first (instant), then delete in background
1518
+ const deletingPath = `${paths.rootDir}.deleting.${Date.now()}`;
1519
+ try {
1520
+ // Rename is atomic and instant on most filesystems
1521
+ await fs.rename(paths.rootDir, deletingPath);
1522
+ deleted = true;
1523
+ // Background deletion (fire-and-forget)
1524
+ // The renamed directory is invisible to listBundles (not a valid UUID)
1525
+ rmIfExists(deletingPath).catch((err) => {
1526
+ logger.warn(`Background deletion failed for ${bundleId}: ${err instanceof Error ? err.message : String(err)}`);
1527
+ });
1528
+ }
1529
+ catch (err) {
1530
+ // Rename failed (maybe concurrent deletion), try direct delete as fallback
1531
+ logger.warn(`Rename failed for ${bundleId}, falling back to direct delete`);
790
1532
  await clearBundle(dir, bundleId);
791
1533
  deleted = true;
792
1534
  }
793
1535
  }
794
- catch {
1536
+ catch (err) {
795
1537
  // Skip unavailable paths
1538
+ logger.debug(`Failed to delete bundle from ${dir}: ${err instanceof Error ? err.message : String(err)}`);
796
1539
  }
797
1540
  }
798
1541
  return deleted;