@indigoai-us/hq-cloud 5.23.0 → 5.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/bin/sync-runner.d.ts +58 -3
  2. package/dist/bin/sync-runner.d.ts.map +1 -1
  3. package/dist/bin/sync-runner.js +84 -2
  4. package/dist/bin/sync-runner.js.map +1 -1
  5. package/dist/bin/sync-runner.test.js +90 -3
  6. package/dist/bin/sync-runner.test.js.map +1 -1
  7. package/dist/cli/share.d.ts +86 -20
  8. package/dist/cli/share.d.ts.map +1 -1
  9. package/dist/cli/share.js +332 -62
  10. package/dist/cli/share.js.map +1 -1
  11. package/dist/cli/share.test.js +490 -6
  12. package/dist/cli/share.test.js.map +1 -1
  13. package/dist/cli/sync.d.ts +48 -0
  14. package/dist/cli/sync.d.ts.map +1 -1
  15. package/dist/cli/sync.js.map +1 -1
  16. package/dist/index.d.ts +2 -0
  17. package/dist/index.d.ts.map +1 -1
  18. package/dist/index.js +3 -0
  19. package/dist/index.js.map +1 -1
  20. package/dist/personal-vault-exclusions.d.ts +128 -0
  21. package/dist/personal-vault-exclusions.d.ts.map +1 -0
  22. package/dist/personal-vault-exclusions.js +231 -0
  23. package/dist/personal-vault-exclusions.js.map +1 -0
  24. package/dist/personal-vault-exclusions.test.d.ts +22 -0
  25. package/dist/personal-vault-exclusions.test.d.ts.map +1 -0
  26. package/dist/personal-vault-exclusions.test.js +198 -0
  27. package/dist/personal-vault-exclusions.test.js.map +1 -0
  28. package/package.json +1 -1
  29. package/src/bin/sync-runner.test.ts +113 -3
  30. package/src/bin/sync-runner.ts +125 -5
  31. package/src/cli/share.test.ts +585 -6
  32. package/src/cli/share.ts +461 -86
  33. package/src/cli/sync.ts +50 -0
  34. package/src/index.ts +10 -0
  35. package/src/personal-vault-exclusions.test.ts +256 -0
  36. package/src/personal-vault-exclusions.ts +277 -0
package/src/cli/share.ts CHANGED
@@ -21,10 +21,58 @@ import {
21
21
  normalizeEtag,
22
22
  } from "../journal.js";
23
23
  import { createIgnoreFilter, isWithinSizeLimit } from "../ignore.js";
24
+ import {
25
+ wrapFilterWithPersonalVaultDefaults,
26
+ type PersonalVaultExclusion,
27
+ } from "../personal-vault-exclusions.js";
24
28
  import { resolveConflict } from "./conflict.js";
25
29
  import type { ConflictStrategy } from "./conflict.js";
26
30
  import type { SyncProgressEvent } from "./sync.js";
27
31
 
32
+ /**
33
+ * Local-only ephemeral artifacts: conflict-mirror files written by the pull
34
+ * leg whenever a 3-way merge keeps local AND wants to preserve the remote
35
+ * version for inspection. Format: `<orig>.conflict-<ISO-utc>-<machineHash>.<ext>`
36
+ * (e.g. `.claude/CLAUDE.md.conflict-2026-05-13T19-40-40Z-e5797a.md`).
37
+ *
38
+ * These files MUST never round-trip to S3 — they're local-only safety backups
39
+ * the user reviews and deletes once the merge is resolved. Pre-fix, the push
40
+ * walker happily uploaded them, the journal recorded them, and the
41
+ * `owned-only` delete policy then refused to clean them up when the user
42
+ * deleted them locally (because pull-confirmation had stamped them as
43
+ * `direction: "down"`). Net effect: a permanent litter ratchet on remote.
44
+ *
45
+ * Wire-points: (1) push walker — `collectFiles` / `walkDir` skip these so
46
+ * they never upload; (2) `computeDeletePlan` — skip these so an already-
47
+ * journaled mirror that's been deleted locally doesn't get included in the
48
+ * regular delete plan (the dedicated reconcile path handles existing litter).
49
+ */
50
+ const EPHEMERAL_PATH_PATTERN =
51
+ /\.conflict-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z-[a-f0-9]+\./;
52
+
53
+ /**
54
+ * Cheap pure check — pass the relative key OR a basename; either works. Used
55
+ * in both the file walker (basename matching) and the delete-plan walker
56
+ * (relative-key matching). The regex matches anywhere in the string, which is
57
+ * fine: the `.conflict-<ISO>-<hash>.` token is unambiguous.
58
+ */
59
+ function isEphemeralPath(p: string): boolean {
60
+ return EPHEMERAL_PATH_PATTERN.test(p);
61
+ }
62
+
63
+ /**
64
+ * Test-only export. Kept under a `_testing` namespace so the module's public
65
+ * surface stays focused on `share()` / `ShareOptions` / `ShareResult` while
66
+ * regression-critical regex contracts (the conflict-mirror pattern) can be
67
+ * pinned by direct unit tests without round-tripping through share().
68
+ *
69
+ * Do NOT import from `_testing` outside of tests in this package.
70
+ */
71
+ export const _testing = {
72
+ isEphemeralPath,
73
+ EPHEMERAL_PATH_PATTERN,
74
+ };
75
+
28
76
  /**
29
77
  * Stage-1 classification for a single local file in a push run. Pre-HEAD —
30
78
  * only inputs we can evaluate locally (size limit, journal hash, optional
@@ -245,29 +293,44 @@ export interface ShareOptions {
245
293
  * convert into remote `DeleteObject` calls. Only consulted when
246
294
  * `propagateDeletes === true`.
247
295
  *
248
- * - `"owned-only"` (default, safer): only entries whose journal
249
- * `direction === "up"` are eligible. That is, only files this
250
- * machine previously uploaded can be remotely deleted on its
251
- * behalf. Entries the journal records as pulled from elsewhere
252
- * (`direction === "down"`) are never delete-propagated the
253
- * local absence may just be an unpulled state or a filter
254
- * mismatch, both of which previously caused this machine to
255
- * erase other machines' uploads.
296
+ * - `"currency-gated"` (safest; default scheduled for 5.25 after soak):
297
+ * for each candidate, issue a remote HEAD and compare the current
298
+ * remote ETag against the journal's
299
+ * last-recorded `remoteEtag`. Match safe-to-delete (this machine is
300
+ * current for the file, so the local deletion reflects an intentional
301
+ * removal AFTER seeing the latest remote version). Mismatch refuse
302
+ * and emit `delete-refused-stale-etag`; the journal entry is left
303
+ * intact so the next pull leg re-pulls via the same hasRemoteChanged
304
+ * path. 404 → tombstone: drop the journal entry, no DeleteObject (the
305
+ * remote was already gone). Strictly safer than `owned-only` because
306
+ * it gates on per-file proof of currency rather than direction-of-
307
+ * origin — files that arrived via `/update-hq` (direction:"down") can
308
+ * legitimately be deleted by the device that pulled them, as long as
309
+ * no other device has touched them since.
310
+ * - `"owned-only"` (current default in 5.24): only entries whose journal
311
+ * `direction === "up"` are eligible. That is, only files this machine
312
+ * previously uploaded can be remotely deleted on its behalf. Entries
313
+ * recorded as pulled from elsewhere are never delete-propagated.
314
+ * Default in 5.24 while currency-gated soaks; scheduled to lose the
315
+ * default in 5.25. Downside: any file that arrived via `/update-hq`
316
+ * or another device's push is stuck on remote forever once locally
317
+ * removed, because no device "owns" it under this rule.
256
318
  * - `"all"`: legacy behaviour — every in-scope journal entry whose
257
- * local file is missing is eligible (regardless of direction). The
258
- * bidirectional runner's first-push and any tool that wants to
259
- * mirror a destructive local checkout opts in here explicitly.
319
+ * local file is missing is eligible (regardless of direction or
320
+ * currency). The bidirectional runner's first-push and any tool that
321
+ * wants to mirror a destructive local checkout opts in here
322
+ * explicitly. Use with care — a stale device can erase peer uploads.
260
323
  *
261
- * Independently of this policy, an entry is also dropped from the
262
- * plan when neither the file-shape nor the directory-shape probe of
263
- * `shouldSync` accepts the path i.e. the current ignore filter
264
- * would have skipped the path on pull (whether classified as a
265
- * regular file or a symlink record / directory). That symmetry
266
- * blocks the failure mode where a path was filtered locally but
267
- * lived in the vault (and the journal) from an older HQ layout or
268
- * a different machine, causing the next push to erase it.
324
+ * Independently of this policy, an entry is also dropped from the plan
325
+ * when (a) it matches `EPHEMERAL_PATH_PATTERN` (conflict mirrors never
326
+ * propagate), or (b) neither the file-shape nor the directory-shape probe
327
+ * of `shouldSync` accepts the path i.e. the current ignore filter would
328
+ * have skipped the path on pull. That symmetry blocks the failure mode
329
+ * where a path was filtered locally but lived in the vault (and the
330
+ * journal) from an older HQ layout or a different machine, causing the
331
+ * next push to erase it.
269
332
  */
270
- propagateDeletePolicy?: "owned-only" | "all";
333
+ propagateDeletePolicy?: "currency-gated" | "owned-only" | "all";
271
334
  /**
272
335
  * Identity stamped onto each uploaded object's S3 user metadata
273
336
  * (`created-by`, `created-by-sub`, `created-at`). The hq-console vault UI
@@ -303,8 +366,39 @@ export interface ShareResult {
303
366
  * Number of remote `DeleteObject` calls that succeeded this run. Always 0
304
367
  * when `propagateDeletes` is false. The corresponding journal entries are
305
368
  * removed in the same pass so the next sync sees the key as truly gone.
369
+ * Does NOT include tombstones (remote was already 404; no DELETE was
370
+ * issued — see `filesTombstoned`) or refused-stale entries (currency-
371
+ * gated refused because remote etag drifted — see `filesRefusedStale`).
306
372
  */
307
373
  filesDeleted: number;
374
+ /**
375
+ * Number of journal entries dropped because the remote was already 404 at
376
+ * HEAD time (cleaned out-of-band — e.g. someone hand-deleted via the S3
377
+ * console, or another tool ran a destructive operation). No `DeleteObject`
378
+ * was issued for these; the journal converges with reality. Always 0 when
379
+ * `propagateDeletes` is false or `propagateDeletePolicy !== "currency-gated"`.
380
+ */
381
+ filesTombstoned: number;
382
+ /**
383
+ * Number of delete candidates refused by the `currency-gated` policy
384
+ * because the remote object's current ETag no longer matches the journal's
385
+ * recorded one (some other device modified the file since this device last
386
+ * synced it) — OR because the journal entry is a legacy record with no
387
+ * `remoteEtag` to compare against. Neither S3 nor the journal is mutated
388
+ * for these; the next pull leg re-pulls naturally via `hasRemoteChanged`.
389
+ * Always 0 when `propagateDeletes` is false or policy is not
390
+ * `currency-gated`.
391
+ */
392
+ filesRefusedStale: number;
393
+ /**
394
+ * Number of paths blocked by `PERSONAL_VAULT_DEFAULT_EXCLUSIONS` during this
395
+ * run (push leg, personalMode=true). Includes both files that would have
396
+ * uploaded and journal entries that would have been included in the delete
397
+ * plan; deduplicated across walks. Always 0 outside personalMode. Mirrors
398
+ * the `count` field of the `personal-vault-out-of-policy` event (which is
399
+ * emitted exactly once if this is > 0).
400
+ */
401
+ filesExcludedByPolicy: number;
308
402
  /**
309
403
  * Paths (company-relative) that were detected as push conflicts. Mirrors
310
404
  * `SyncResult.conflictPaths` so push and pull surface conflicts the same
@@ -319,13 +413,16 @@ export interface ShareResult {
319
413
  */
320
414
  export async function share(options: ShareOptions): Promise<ShareResult> {
321
415
  const { paths, company, message, onConflict, vaultConfig, entityContext, hqRoot, skipUnchanged, propagateDeletes } = options;
322
- // Default to the safer "owned-only" policy when delete-propagation is on
323
- // but the caller hasn't pinned a policy. Pre-existing callers that passed
324
- // `propagateDeletes: true` (the `sync now` push leg, the runner's
325
- // bidirectional sync, the `--all` fanout) thereby flip to the safer
326
- // semantics automatically. Set `propagateDeletePolicy: "all"` explicitly
327
- // to opt back into the legacy any-missing-file-deletes behaviour.
328
- const propagateDeletePolicy: "owned-only" | "all" =
416
+ // Default to "owned-only" the pre-5.24 behavior — when delete-propagation
417
+ // is on but the caller hasn't pinned a policy. Staged-default rollout
418
+ // (see CHANGELOG / PR for hq-cloud 5.24.0): 5.24 ships the currency-gated
419
+ // CODE PATH plus the conflict-mirror exclusion (which is policy-
420
+ // independent and immediately stops new litter), but holds the default
421
+ // flip to a later release after soak. Opt into the safer policy now via
422
+ // `propagateDeletePolicy: "currency-gated"` (explicit) or
423
+ // `HQ_SYNC_DELETE_POLICY=currency-gated` (env, honored by sync-runner).
424
+ // The default flip to `"currency-gated"` is scheduled for 5.25.0.
425
+ const propagateDeletePolicy: "currency-gated" | "owned-only" | "all" =
329
426
  options.propagateDeletePolicy ?? "owned-only";
330
427
  const emit = options.onEvent ?? defaultConsoleLogger;
331
428
 
@@ -379,7 +476,34 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
379
476
  const syncRoot = options.personalMode === true
380
477
  ? hqRoot
381
478
  : path.join(hqRoot, "companies", ctx.slug);
382
- const shouldSync = createIgnoreFilter(hqRoot);
479
+
480
+ // Personal-vault default exclusions (introduced in 5.25): wrap the base
481
+ // ignore filter so paths matching `PERSONAL_VAULT_DEFAULT_EXCLUSIONS` are
482
+ // rejected before they upload OR enter the delete plan. Refuses & warns —
483
+ // an already-leaked remote object stays put as an orphan; a separate one-
484
+ // shot purge handles legacy litter.
485
+ //
486
+ // Out-of-policy hits are deduplicated in `excludedSet` so the same path
487
+ // hitting the filter from both the upload walk and the delete-plan walk
488
+ // counts once. `excludedById` powers the per-rule breakdown on the
489
+ // `personal-vault-out-of-policy` event so UI can render which class
490
+ // (secret / machine-local / scratch / …) did the work.
491
+ //
492
+ // Company-mode syncs skip this wrap entirely — company vaults have their
493
+ // own first-push protection (settings/, data/, workers/, .git/) defined
494
+ // in hq-sync's Rust util/ignore.rs, and a company may legitimately ship
495
+ // `output/` or `.env*` paths inside its `companies/{slug}/data/` folder.
496
+ const ignoreFilter = createIgnoreFilter(hqRoot);
497
+ const excludedSet = new Set<string>();
498
+ const excludedById: Record<string, number> = {};
499
+ const onExcluded = (rel: string, match: PersonalVaultExclusion) => {
500
+ if (excludedSet.has(rel)) return;
501
+ excludedSet.add(rel);
502
+ excludedById[match.id] = (excludedById[match.id] ?? 0) + 1;
503
+ };
504
+ const shouldSync = options.personalMode === true
505
+ ? wrapFilterWithPersonalVaultDefaults(ignoreFilter, syncRoot, onExcluded)
506
+ : ignoreFilter;
383
507
  const journalSlug = options.journalSlug ?? ctx.slug;
384
508
  const journal = readJournal(journalSlug);
385
509
 
@@ -387,6 +511,13 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
387
511
  let bytesUploaded = 0;
388
512
  let filesSkipped = 0;
389
513
  let filesDeleted = 0;
514
+ // Tombstone and refused-stale counts mirror the deletePlan buckets so the
515
+ // ShareResult can report them without the caller having to count events.
516
+ // Populated only after Stage 3 runs (deletePlan is computed first, then
517
+ // mutated through the execution loop) — initial zero handles the
518
+ // propagateDeletes=false path.
519
+ let filesTombstoned = 0;
520
+ let filesRefusedStale = 0;
390
521
  const conflictPaths: string[] = [];
391
522
 
392
523
  // Collect all files to share
@@ -406,15 +537,16 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
406
537
  const deleteScopeRoots = propagateDeletes === true
407
538
  ? resolveDeleteScopeRoots(paths, hqRoot, syncRoot)
408
539
  : [];
409
- const deletePlan = propagateDeletes === true
410
- ? computeDeletePlan(
540
+ const deletePlan: DeletePlan = propagateDeletes === true
541
+ ? await computeDeletePlan(
411
542
  journal,
412
543
  syncRoot,
413
544
  deleteScopeRoots,
414
545
  shouldSync,
415
546
  propagateDeletePolicy,
547
+ ctx,
416
548
  )
417
- : [];
549
+ : { toDelete: [], toTombstone: [], refusedStale: [] };
418
550
 
419
551
  emit({
420
552
  type: "plan",
@@ -427,7 +559,11 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
427
559
  // Push conflicts require a remote HEAD; we don't yet do that in Stage 1,
428
560
  // so this stays 0. V1.5 (single LIST) will let us classify them up-front.
429
561
  filesToConflict: 0,
430
- filesToDelete: deletePlan.length,
562
+ // Reported count is the deletes we're actually going to issue — does NOT
563
+ // include tombstones (no S3 call) or refused-stale (no journal change).
564
+ // Refusals surface as their own event stream so consumers that care can
565
+ // render a "kept on remote: N" line separately.
566
+ filesToDelete: deletePlan.toDelete.length,
431
567
  });
432
568
 
433
569
  // Stage 2: execute. Skip items pre-classified as no-ops, then for each
@@ -498,6 +634,17 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
498
634
  bytesUploaded,
499
635
  filesSkipped,
500
636
  filesDeleted,
637
+ // Abort path: delete stage never runs, so tombstone + refused-
638
+ // stale counts are necessarily zero. Explicit fields keep the
639
+ // ShareResult shape stable for consumers that destructure.
640
+ filesTombstoned,
641
+ filesRefusedStale,
642
+ // Exclusions are computed during the upload walk which has
643
+ // already completed by the time we hit a per-file conflict-
644
+ // abort, so the count is meaningful here. No event emit on
645
+ // abort (matches the existing convention: abort short-circuits
646
+ // before the end-of-run telemetry emits).
647
+ filesExcludedByPolicy: excludedSet.size,
501
648
  conflictPaths,
502
649
  aborted: true,
503
650
  };
@@ -554,12 +701,28 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
554
701
  }
555
702
  }
556
703
 
557
- // Stage 3: propagate deletes. Each call writes a delete-marker (versioning
558
- // is enabled on the bucket) and removes the corresponding journal entry so
559
- // the next sync sees the key as truly gone on this machine. A failed
560
- // DeleteObject leaves both the journal entry and the remote object intact
561
- // the next run will retry.
562
- for (const relativePath of deletePlan) {
704
+ // Stage 3: propagate deletes. Three buckets, three actions:
705
+ //
706
+ // 1. `toDelete` write a delete-marker (versioning is enabled on the
707
+ // bucket so the delete is soft and prior versions remain recoverable)
708
+ // and remove the journal entry so the next sync sees the key as
709
+ // truly gone on this machine. A failed DeleteObject leaves both
710
+ // the journal entry and remote object intact — the next run retries.
711
+ //
712
+ // 2. `toTombstone` — the remote was 404 at HEAD time (cleaned up out
713
+ // of band, e.g. someone hand-deleted via console). No DeleteObject
714
+ // needed; just drop the journal entry so the journal converges with
715
+ // reality. Emit a synthetic `progress` event with `deleted: true`
716
+ // and bytes=0 so consumers see the convergence.
717
+ //
718
+ // 3. `refusedStale` — under `currency-gated`, the remote's current
719
+ // ETag no longer matches the journal's recorded one. Some other
720
+ // device modified the file since this device last synced it. Keep
721
+ // the remote intact; keep the journal entry intact. The next pull
722
+ // leg of `sync now` re-pulls naturally via the existing
723
+ // `hasRemoteChanged` path. Emit a dedicated event so UIs can
724
+ // surface the refusal without inferring it from absence.
725
+ for (const relativePath of deletePlan.toDelete) {
563
726
  if (vaultConfig && isExpiringSoon(ctx.expiresAt)) {
564
727
  ctx = await refreshEntityContext(companyRef, vaultConfig);
565
728
  }
@@ -583,17 +746,59 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
583
746
  });
584
747
  }
585
748
  }
749
+ for (const relativePath of deletePlan.toTombstone) {
750
+ removeEntry(journal, relativePath);
751
+ filesTombstoned++;
752
+ emit({
753
+ type: "progress",
754
+ path: relativePath,
755
+ bytes: 0,
756
+ deleted: true,
757
+ message: "tombstone (remote already 404)",
758
+ });
759
+ }
760
+ for (const refused of deletePlan.refusedStale) {
761
+ filesRefusedStale++;
762
+ emit({
763
+ type: "delete-refused-stale-etag",
764
+ path: refused.key,
765
+ journalEtag: refused.journalEtag,
766
+ remoteEtag: refused.remoteEtag,
767
+ reason: refused.reason,
768
+ });
769
+ }
586
770
 
587
771
  // See cli/sync.ts: stamp lastSync on completion so a no-op share still
588
772
  // ticks the "Last sync" indicator.
589
773
  journal.lastSync = new Date().toISOString();
590
774
  writeJournal(journalSlug, journal);
591
775
 
776
+ // Personal-vault out-of-policy summary. Emit at most once, only when at
777
+ // least one path was excluded. Sample is capped at 10 to keep the event
778
+ // small (Set iteration order = insertion order, so samples are the first
779
+ // ten paths encountered during the walk — deterministic, not random).
780
+ if (excludedSet.size > 0) {
781
+ const samplePaths: string[] = [];
782
+ for (const p of excludedSet) {
783
+ samplePaths.push(p);
784
+ if (samplePaths.length >= 10) break;
785
+ }
786
+ emit({
787
+ type: "personal-vault-out-of-policy",
788
+ count: excludedSet.size,
789
+ samplePaths,
790
+ byId: { ...excludedById },
791
+ });
792
+ }
793
+
592
794
  return {
593
795
  filesUploaded,
594
796
  bytesUploaded,
595
797
  filesSkipped,
596
798
  filesDeleted,
799
+ filesTombstoned,
800
+ filesRefusedStale,
801
+ filesExcludedByPolicy: excludedSet.size,
597
802
  conflictPaths,
598
803
  aborted: false,
599
804
  };
@@ -612,7 +817,12 @@ function defaultConsoleLogger(event: SyncProgressEvent): void {
612
817
  }
613
818
  } else if (event.type === "progress") {
614
819
  if (event.deleted) {
615
- console.log(` ${event.path} (deleted)`);
820
+ // Append `message` when present (e.g. tombstone events carry
821
+ // "tombstone (remote already 404)"). Without this, tombstones and
822
+ // real deletes render byte-identically in the tty stream, and
823
+ // operators have no way to distinguish from logs alone.
824
+ const suffix = event.message ? ` — ${event.message}` : "";
825
+ console.log(` ✗ ${event.path} (deleted)${suffix}`);
616
826
  } else if (event.message) {
617
827
  console.log(` ✓ ${event.path} — "${event.message}"`);
618
828
  } else {
@@ -624,6 +834,19 @@ function defaultConsoleLogger(event: SyncProgressEvent): void {
624
834
  );
625
835
  } else if (event.type === "error") {
626
836
  console.error(` ✗ ${event.path} — ${event.message}`);
837
+ } else if (event.type === "delete-refused-stale-etag") {
838
+ // Branch on `reason`, not on the sentinel etag strings, so legacy
839
+ // entries render with a clear explanation instead of "<legacy-no-etag>"
840
+ // leaking into operator-visible output.
841
+ if (event.reason === "legacy-no-etag") {
842
+ console.error(
843
+ ` ⚠ no-etag-on-record, kept on remote: ${event.path} (journal entry predates etag tracking)`,
844
+ );
845
+ } else {
846
+ console.error(
847
+ ` ⚠ stale-etag, kept on remote: ${event.path} (journal=${event.journalEtag}, remote=${event.remoteEtag})`,
848
+ );
849
+ }
627
850
  }
628
851
  }
629
852
 
@@ -680,6 +903,12 @@ function collectFiles(
680
903
  for (const p of paths) {
681
904
  const absolutePath = path.isAbsolute(p) ? p : path.resolve(hqRoot, p);
682
905
 
906
+ // Ephemeral artifacts (conflict mirrors) — see EPHEMERAL_PATH_PATTERN doc.
907
+ // Caller may pass one explicitly; we still refuse to upload it. Basename
908
+ // check matches the walkDir gate so behavior is identical whether the
909
+ // mirror is the user-supplied path or found during directory recursion.
910
+ if (isEphemeralPath(path.basename(absolutePath))) continue;
911
+
683
912
  // existsSync follows symlinks: a dangling top-level link will report
684
913
  // not-existing and be skipped here. lstatSync below handles the
685
914
  // valid-link case directly without needing the existsSync gate.
@@ -758,6 +987,11 @@ function walkDir(
758
987
 
759
988
  const entries = fs.readdirSync(dir, { withFileTypes: true });
760
989
  for (const entry of entries) {
990
+ // Ephemeral artifacts (conflict mirrors) are local-only safety backups
991
+ // that MUST NEVER round-trip to S3. Check basename here so the filter
992
+ // applies regardless of which company root contains them. See
993
+ // EPHEMERAL_PATH_PATTERN doc for the full rationale.
994
+ if (isEphemeralPath(entry.name)) continue;
761
995
  const absolutePath = path.join(dir, entry.name);
762
996
  const isDir = entry.isDirectory();
763
997
 
@@ -898,38 +1132,144 @@ function resolveDeleteScopeRoots(
898
1132
  return Array.from(prefixes);
899
1133
  }
900
1134
 
1135
+ /**
1136
+ * Reason a candidate was bucketed into `refusedStale`. Discriminated so
1137
+ * consumers (UI, telemetry, the event logger) can branch on intent without
1138
+ * string-comparing the placeholder etag value.
1139
+ * - `"stale-etag"` → currency-gated saw a real etag mismatch (peer
1140
+ * drift). `journalEtag` and `remoteEtag` are both
1141
+ * real ETag values.
1142
+ * - `"legacy-no-etag"` → journal entry was written before remoteEtag was
1143
+ * tracked. `journalEtag` and `remoteEtag` are
1144
+ * placeholder sentinels — do not display as ETags.
1145
+ */
1146
+ type RefusedStaleReason = "stale-etag" | "legacy-no-etag";
1147
+
1148
+ /**
1149
+ * Three buckets returned by computeDeletePlan, exposed so the execution
1150
+ * loop can take a different action for each:
1151
+ * - `toDelete` → issue DeleteObject + drop journal entry.
1152
+ * - `toTombstone` → no DeleteObject (remote already 404), drop journal
1153
+ * entry. Lets the journal converge with reality even
1154
+ * when the remote was cleaned out-of-band.
1155
+ * - `refusedStale` → no DeleteObject, no journal change. Some other
1156
+ * device modified the remote object since this device
1157
+ * last synced it; the next pull leg re-pulls via the
1158
+ * same `hasRemoteChanged` path the conflict detector
1159
+ * uses. Emitted as `delete-refused-stale-etag` events.
1160
+ */
1161
+ interface DeletePlan {
1162
+ toDelete: string[];
1163
+ toTombstone: string[];
1164
+ refusedStale: Array<{
1165
+ key: string;
1166
+ journalEtag: string;
1167
+ remoteEtag: string;
1168
+ reason: RefusedStaleReason;
1169
+ }>;
1170
+ }
1171
+
1172
+ /**
1173
+ * Concurrency cap for the per-file HEAD-O-meter (currency-gated). Sequential
1174
+ * HEADs would add ~N×(50-200ms) to a sync — for the 261-mirror real-world
1175
+ * case that's 15-50s of latency. 16-way concurrency keeps S3 well within
1176
+ * per-prefix burst limits (~3,500 GET/HEAD/sec/prefix is the documented
1177
+ * floor) and bounded under the AWS-SDK default agent's max-sockets so we
1178
+ * don't compete with the in-flight upload pool.
1179
+ */
1180
+ const DELETE_PLAN_HEAD_CONCURRENCY = 16;
1181
+
901
1182
  /**
902
1183
  * Walk every journal key in `scopeRoots` whose local file is missing from
903
- * disk and return the keys eligible for a remote `DeleteObject`. An entry
904
- * is in the plan only when ALL of the following hold:
1184
+ * disk and bucket each candidate into the right action per `policy`. Hard
1185
+ * filters that drop a candidate entirely (no bucket) regardless of policy:
905
1186
  *
906
- * 1. Its key matches (or sits beneath) one of the `scopeRoots` prefixes.
907
- * 2. Its local file is missing from disk.
908
- * 3. The current ignore filter (`shouldSync`) accepts the keyso paths
1187
+ * 1. Its key must match (or sit beneath) one of the `scopeRoots` prefixes.
1188
+ * 2. Its local file must be missing from disk (lstat ENOENT). We use
1189
+ * `lstat` (not `existsSync`) so a dangling symlink a link whose
1190
+ * target has been removed but whose link file is still on disk —
1191
+ * counts as "still present locally" and is NOT delete-propagated.
1192
+ * Pre-fix, existsSync followed the link, returned false, and the
1193
+ * entry was queued for remote DeleteObject in the same sync that
1194
+ * had just uploaded it via `uploadSymlink` — the link round-tripped
1195
+ * as "upload, then delete" in one cycle. ENOENT means truly absent
1196
+ * → eligible; other lstat errors propagate.
1197
+ * 3. The current ignore filter (`shouldSync`) accepts the key — paths
909
1198
  * filtered out by `.hqignore` / `.gitignore` / `DEFAULT_IGNORES` are
910
- * never delete-propagated. This blocks the failure mode where a path
911
- * lives in the vault (and the journal) but the local walk skips it
912
- * because of asymmetric ignore rules; without this guard the push
913
- * leg would erase it.
914
- * 4. When `policy === "owned-only"`: the journal entry's `direction`
915
- * is `"up"` (i.e. this machine previously uploaded the file). This
916
- * blocks the failure mode where a behind machine's first `sync now`
917
- * push leg would otherwise erase recent uploads from peers, since
918
- * those entries are recorded as `direction: "down"` (pulled) or
919
- * absent (never seen). Set `policy: "all"` to opt back into the
920
- * legacy any-missing-file-deletes behaviour.
1199
+ * never delete-propagated. Closes the failure mode where a path lives
1200
+ * in the vault (and journal) but the local walk skips it because of
1201
+ * asymmetric ignore rules.
1202
+ *
1203
+ * Dual-hint probe: by the time we're considering this entry for
1204
+ * remote deletion, the local file is already gone we have no way to
1205
+ * know whether it was a regular file or a symlink record. A single
1206
+ * `isDir=false` probe would silently keep the remote record alive
1207
+ * whenever the only matching `.hqinclude` allowlist pattern is dir-
1208
+ * only (e.g. `companies/*\/knowledge/`), since gitignore's slash
1209
+ * semantics reject the slashless probe. The same dual-hint pattern in
1210
+ * `walkDir`/`collectFiles` (push) and `computePullPlan` (pull) applies
1211
+ * symmetrically here. Pure path lookup, no I/O.
1212
+ * 4. The key does NOT match `EPHEMERAL_PATH_PATTERN`. Conflict mirrors
1213
+ * are local-only artifacts that should never have been journaled in
1214
+ * the first place; the dedicated reconcile command sweeps already-
1215
+ * journaled mirrors. Excluding them here keeps a regular `sync now`
1216
+ * from accidentally deleting a mirror another device is still
1217
+ * reviewing.
1218
+ *
1219
+ * Then per-policy bucketing:
1220
+ *
1221
+ * - `"currency-gated"` (default, safest): issue a HEAD against the remote.
1222
+ * 200 + `normalizeEtag(remote) === entry.remoteEtag` → `toDelete`.
1223
+ * 200 + mismatch → `refusedStale` (peer drift; let pull re-pull).
1224
+ * 404 → `toTombstone` (remote was cleaned out-of-band).
1225
+ * If the journal entry has no recorded `remoteEtag` (legacy entries
1226
+ * written before etag tracking), the candidate falls back to
1227
+ * `refusedStale` with `reason: "legacy-no-etag"` — we can't prove
1228
+ * currency without an etag, so refusal is the safe direction. The
1229
+ * journal entry survives so a future sync with a recorded etag can
1230
+ * re-evaluate.
1231
+ *
1232
+ * HEAD calls are batched at `DELETE_PLAN_HEAD_CONCURRENCY` so a large
1233
+ * candidate set (e.g. a one-shot reconcile sweep) doesn't serialize
1234
+ * into N×RTT latency. The candidate set is materialized into a list
1235
+ * first (synchronous filters above), then the HEAD pass runs in
1236
+ * bounded-parallel chunks.
1237
+ *
1238
+ * Note: there is a TOCTOU window between this HEAD and the eventual
1239
+ * `deleteRemoteFile` call in the share() execution loop. If a peer
1240
+ * overwrites the object in that window (~50-200ms), the resulting
1241
+ * delete-marker lands on a newer version than we verified. S3
1242
+ * versioning makes the worst case recoverable (prior versions are
1243
+ * retained), and the conditional-delete primitive does not exist on
1244
+ * S3 DeleteObject — only PutObject/CopyObject accept `IfMatch`. The
1245
+ * window is bounded, not zero. Realtime sync (separate work) reduces
1246
+ * it further by keeping the journal continuously fresh.
1247
+ * - `"owned-only"`: include only entries with `direction === "up"`. No
1248
+ * HEAD round-trip. Goes to `toDelete`. Legacy fallback.
1249
+ * - `"all"`: include every candidate. No HEAD, no direction check. Goes
1250
+ * to `toDelete`. Caller has explicitly opted out of safety gates.
921
1251
  *
922
1252
  * Empty `scopeRoots` ⇒ empty plan (caller didn't opt in).
923
1253
  */
924
- function computeDeletePlan(
1254
+ async function computeDeletePlan(
925
1255
  journal: SyncJournal,
926
1256
  syncRoot: string,
927
1257
  scopeRoots: string[],
928
1258
  shouldSync: (filePath: string, isDir?: boolean) => boolean,
929
- policy: "owned-only" | "all",
930
- ): string[] {
931
- if (scopeRoots.length === 0) return [];
932
- const out: string[] = [];
1259
+ policy: "currency-gated" | "owned-only" | "all",
1260
+ ctx: EntityContext,
1261
+ ): Promise<DeletePlan> {
1262
+ const plan: DeletePlan = { toDelete: [], toTombstone: [], refusedStale: [] };
1263
+ if (scopeRoots.length === 0) return plan;
1264
+
1265
+ // Stage 1: synchronous pre-filter. Walk every journal entry and either
1266
+ // drop it (hard filter), assign it directly to a bucket (owned-only /
1267
+ // all), or queue it for HEAD (currency-gated). Keeping this synchronous
1268
+ // means the HEAD pass below sees a single, deduplicated candidate list
1269
+ // and the journal-mutation buckets are already settled before any I/O.
1270
+ type HeadCandidate = { key: string; journalEtag: string };
1271
+ const headCandidates: HeadCandidate[] = [];
1272
+
933
1273
  for (const [relativeKey, entry] of Object.entries(journal.files)) {
934
1274
  const inScope = scopeRoots.some(
935
1275
  (root) =>
@@ -939,14 +1279,6 @@ function computeDeletePlan(
939
1279
  );
940
1280
  if (!inScope) continue;
941
1281
  const localPath = path.join(syncRoot, relativeKey);
942
- // lstat (not existsSync) so a dangling symlink — a link whose
943
- // target has been removed but whose link file is still on disk —
944
- // counts as "still present locally" and is NOT delete-propagated.
945
- // Pre-fix, existsSync followed the link, returned false, and the
946
- // entry was queued for remote DeleteObject in the same sync that
947
- // had just uploaded it via uploadSymlink. The link round-tripped
948
- // as "upload, then delete" in one cycle. ENOENT means truly
949
- // absent → eligible; other lstat errors propagate.
950
1282
  let presentLocally = true;
951
1283
  try {
952
1284
  fs.lstatSync(localPath);
@@ -963,24 +1295,67 @@ function computeDeletePlan(
963
1295
  }
964
1296
  }
965
1297
  if (presentLocally) continue;
966
- // (3) Symmetric filter guard. `shouldSync` is constructed from the same
967
- // hqRoot the pull leg uses, so a key the pull would have skipped
968
- // ("ignored") is also one we must not delete-propagate.
969
- //
970
- // Dual-hint probe: by the time we're considering this entry for
971
- // remote deletion, the local file is already gone — we have no
972
- // way to know whether it was a regular file or a symlink record.
973
- // A single isDir=false probe would silently keep the remote
974
- // record alive whenever the only matching .hqinclude allowlist
975
- // pattern is dir-only (e.g. `companies/*/knowledge/`), since
976
- // gitignore's slash semantics reject the slashless probe. The
977
- // same dual-hint pattern in walkDir/collectFiles (push) and
978
- // computePullPlan (pull) applies symmetrically here. Pure path
979
- // lookup, no I/O.
980
1298
  if (!shouldSync(localPath, false) && !shouldSync(localPath, true)) continue;
981
- // (4) Direction guard under "owned-only" policy.
982
- if (policy === "owned-only" && entry.direction !== "up") continue;
983
- out.push(relativeKey);
1299
+ // Ephemeral artifacts (conflict mirrors) never propagate-delete via the
1300
+ // normal path see EPHEMERAL_PATH_PATTERN doc.
1301
+ if (isEphemeralPath(relativeKey)) continue;
1302
+
1303
+ if (policy === "all") {
1304
+ plan.toDelete.push(relativeKey);
1305
+ continue;
1306
+ }
1307
+ if (policy === "owned-only") {
1308
+ if (entry.direction !== "up") continue;
1309
+ plan.toDelete.push(relativeKey);
1310
+ continue;
1311
+ }
1312
+ // currency-gated: queue for HEAD unless the entry is legacy (no etag).
1313
+ const journalEtag = entry.remoteEtag;
1314
+ if (!journalEtag) {
1315
+ plan.refusedStale.push({
1316
+ key: relativeKey,
1317
+ journalEtag: "<legacy-no-etag>",
1318
+ remoteEtag: "<unknown>",
1319
+ reason: "legacy-no-etag",
1320
+ });
1321
+ continue;
1322
+ }
1323
+ headCandidates.push({ key: relativeKey, journalEtag });
984
1324
  }
985
- return out;
1325
+
1326
+ // Stage 2: bounded-parallel HEAD pass. Promise.all over chunks of size
1327
+ // `DELETE_PLAN_HEAD_CONCURRENCY` so a large candidate set doesn't
1328
+ // serialize into N round-trips, and so we don't burst past the AWS-SDK
1329
+ // default agent's per-host socket cap. Each result is bucketed
1330
+ // independently — one failed HEAD doesn't poison the others (errors
1331
+ // propagate from the chunk's Promise.all and are surfaced by share()'s
1332
+ // outer try/catch, mirroring the existing pre-share error handling).
1333
+ for (let i = 0; i < headCandidates.length; i += DELETE_PLAN_HEAD_CONCURRENCY) {
1334
+ const chunk = headCandidates.slice(i, i + DELETE_PLAN_HEAD_CONCURRENCY);
1335
+ const results = await Promise.all(
1336
+ chunk.map(async (c) => ({
1337
+ candidate: c,
1338
+ remote: await headRemoteFile(ctx, c.key),
1339
+ })),
1340
+ );
1341
+ for (const { candidate, remote } of results) {
1342
+ if (remote === null) {
1343
+ plan.toTombstone.push(candidate.key);
1344
+ continue;
1345
+ }
1346
+ const currentEtag = normalizeEtag(remote.etag);
1347
+ if (currentEtag === candidate.journalEtag) {
1348
+ plan.toDelete.push(candidate.key);
1349
+ } else {
1350
+ plan.refusedStale.push({
1351
+ key: candidate.key,
1352
+ journalEtag: candidate.journalEtag,
1353
+ remoteEtag: currentEtag,
1354
+ reason: "stale-etag",
1355
+ });
1356
+ }
1357
+ }
1358
+ }
1359
+
1360
+ return plan;
986
1361
  }