@indigoai-us/hq-cloud 5.23.0 → 5.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli/share.ts CHANGED
@@ -25,6 +25,50 @@ import { resolveConflict } from "./conflict.js";
25
25
  import type { ConflictStrategy } from "./conflict.js";
26
26
  import type { SyncProgressEvent } from "./sync.js";
27
27
 
28
+ /**
29
+ * Local-only ephemeral artifacts: conflict-mirror files written by the pull
30
+ * leg whenever a 3-way merge keeps local AND wants to preserve the remote
31
+ * version for inspection. Format: `<orig>.conflict-<ISO-utc>-<machineHash>.<ext>`
32
+ * (e.g. `.claude/CLAUDE.md.conflict-2026-05-13T19-40-40Z-e5797a.md`).
33
+ *
34
+ * These files MUST never round-trip to S3 — they're local-only safety backups
35
+ * the user reviews and deletes once the merge is resolved. Pre-fix, the push
36
+ * walker happily uploaded them, the journal recorded them, and the
37
+ * `owned-only` delete policy then refused to clean them up when the user
38
+ * deleted them locally (because pull-confirmation had stamped them as
39
+ * `direction: "down"`). Net effect: a permanent litter ratchet on remote.
40
+ *
41
+ * Wire-points: (1) push walker — `collectFiles` / `walkDir` skip these so
42
+ * they never upload; (2) `computeDeletePlan` — skip these so an already-
43
+ * journaled mirror that's been deleted locally doesn't get included in the
44
+ * regular delete plan (the dedicated reconcile path handles existing litter).
45
+ */
46
+ const EPHEMERAL_PATH_PATTERN =
47
+ /\.conflict-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z-[a-f0-9]+\./;
48
+
49
+ /**
50
+ * Cheap pure check — pass the relative key OR a basename; either works. Used
51
+ * in both the file walker (basename matching) and the delete-plan walker
52
+ * (relative-key matching). The regex matches anywhere in the string, which is
53
+ * fine: the `.conflict-<ISO>-<hash>.` token is unambiguous.
54
+ */
55
+ function isEphemeralPath(p: string): boolean {
56
+ return EPHEMERAL_PATH_PATTERN.test(p);
57
+ }
58
+
59
+ /**
60
+ * Test-only export. Kept under a `_testing` namespace so the module's public
61
+ * surface stays focused on `share()` / `ShareOptions` / `ShareResult` while
62
+ * regression-critical regex contracts (the conflict-mirror pattern) can be
63
+ * pinned by direct unit tests without round-tripping through share().
64
+ *
65
+ * Do NOT import from `_testing` outside of tests in this package.
66
+ */
67
+ export const _testing = {
68
+ isEphemeralPath,
69
+ EPHEMERAL_PATH_PATTERN,
70
+ };
71
+
28
72
  /**
29
73
  * Stage-1 classification for a single local file in a push run. Pre-HEAD —
30
74
  * only inputs we can evaluate locally (size limit, journal hash, optional
@@ -245,29 +289,44 @@ export interface ShareOptions {
245
289
  * convert into remote `DeleteObject` calls. Only consulted when
246
290
  * `propagateDeletes === true`.
247
291
  *
248
- * - `"owned-only"` (default, safer): only entries whose journal
249
- * `direction === "up"` are eligible. That is, only files this
250
- * machine previously uploaded can be remotely deleted on its
251
- * behalf. Entries the journal records as pulled from elsewhere
252
- * (`direction === "down"`) are never delete-propagated the
253
- * local absence may just be an unpulled state or a filter
254
- * mismatch, both of which previously caused this machine to
255
- * erase other machines' uploads.
292
+ * - `"currency-gated"` (safest; default scheduled for 5.25 after soak):
293
+ * for each candidate, issue a remote HEAD and compare the current
294
+ * remote ETag against the journal's
295
+ * last-recorded `remoteEtag`. Match safe-to-delete (this machine is
296
+ * current for the file, so the local deletion reflects an intentional
297
+ * removal AFTER seeing the latest remote version). Mismatch refuse
298
+ * and emit `delete-refused-stale-etag`; the journal entry is left
299
+ * intact so the next pull leg re-pulls via the same hasRemoteChanged
300
+ * path. 404 → tombstone: drop the journal entry, no DeleteObject (the
301
+ * remote was already gone). Strictly safer than `owned-only` because
302
+ * it gates on per-file proof of currency rather than direction-of-
303
+ * origin — files that arrived via `/update-hq` (direction:"down") can
304
+ * legitimately be deleted by the device that pulled them, as long as
305
+ * no other device has touched them since.
306
+ * - `"owned-only"` (current default in 5.24): only entries whose journal
307
+ * `direction === "up"` are eligible. That is, only files this machine
308
+ * previously uploaded can be remotely deleted on its behalf. Entries
309
+ * recorded as pulled from elsewhere are never delete-propagated.
310
+ * Default in 5.24 while currency-gated soaks; scheduled to lose the
311
+ * default in 5.25. Downside: any file that arrived via `/update-hq`
312
+ * or another device's push is stuck on remote forever once locally
313
+ * removed, because no device "owns" it under this rule.
256
314
  * - `"all"`: legacy behaviour — every in-scope journal entry whose
257
- * local file is missing is eligible (regardless of direction). The
258
- * bidirectional runner's first-push and any tool that wants to
259
- * mirror a destructive local checkout opts in here explicitly.
315
+ * local file is missing is eligible (regardless of direction or
316
+ * currency). The bidirectional runner's first-push and any tool that
317
+ * wants to mirror a destructive local checkout opts in here
318
+ * explicitly. Use with care — a stale device can erase peer uploads.
260
319
  *
261
- * Independently of this policy, an entry is also dropped from the
262
- * plan when neither the file-shape nor the directory-shape probe of
263
- * `shouldSync` accepts the path i.e. the current ignore filter
264
- * would have skipped the path on pull (whether classified as a
265
- * regular file or a symlink record / directory). That symmetry
266
- * blocks the failure mode where a path was filtered locally but
267
- * lived in the vault (and the journal) from an older HQ layout or
268
- * a different machine, causing the next push to erase it.
320
+ * Independently of this policy, an entry is also dropped from the plan
321
+ * when (a) it matches `EPHEMERAL_PATH_PATTERN` (conflict mirrors never
322
+ * propagate), or (b) neither the file-shape nor the directory-shape probe
323
+ * of `shouldSync` accepts the path i.e. the current ignore filter would
324
+ * have skipped the path on pull. That symmetry blocks the failure mode
325
+ * where a path was filtered locally but lived in the vault (and the
326
+ * journal) from an older HQ layout or a different machine, causing the
327
+ * next push to erase it.
269
328
  */
270
- propagateDeletePolicy?: "owned-only" | "all";
329
+ propagateDeletePolicy?: "currency-gated" | "owned-only" | "all";
271
330
  /**
272
331
  * Identity stamped onto each uploaded object's S3 user metadata
273
332
  * (`created-by`, `created-by-sub`, `created-at`). The hq-console vault UI
@@ -303,8 +362,30 @@ export interface ShareResult {
303
362
  * Number of remote `DeleteObject` calls that succeeded this run. Always 0
304
363
  * when `propagateDeletes` is false. The corresponding journal entries are
305
364
  * removed in the same pass so the next sync sees the key as truly gone.
365
+ * Does NOT include tombstones (remote was already 404; no DELETE was
366
+ * issued — see `filesTombstoned`) or refused-stale entries (currency-
367
+ * gated refused because remote etag drifted — see `filesRefusedStale`).
306
368
  */
307
369
  filesDeleted: number;
370
+ /**
371
+ * Number of journal entries dropped because the remote was already 404 at
372
+ * HEAD time (cleaned out-of-band — e.g. someone hand-deleted via the S3
373
+ * console, or another tool ran a destructive operation). No `DeleteObject`
374
+ * was issued for these; the journal converges with reality. Always 0 when
375
+ * `propagateDeletes` is false or `propagateDeletePolicy !== "currency-gated"`.
376
+ */
377
+ filesTombstoned: number;
378
+ /**
379
+ * Number of delete candidates refused by the `currency-gated` policy
380
+ * because the remote object's current ETag no longer matches the journal's
381
+ * recorded one (some other device modified the file since this device last
382
+ * synced it) — OR because the journal entry is a legacy record with no
383
+ * `remoteEtag` to compare against. Neither S3 nor the journal is mutated
384
+ * for these; the next pull leg re-pulls naturally via `hasRemoteChanged`.
385
+ * Always 0 when `propagateDeletes` is false or policy is not
386
+ * `currency-gated`.
387
+ */
388
+ filesRefusedStale: number;
308
389
  /**
309
390
  * Paths (company-relative) that were detected as push conflicts. Mirrors
310
391
  * `SyncResult.conflictPaths` so push and pull surface conflicts the same
@@ -319,13 +400,16 @@ export interface ShareResult {
319
400
  */
320
401
  export async function share(options: ShareOptions): Promise<ShareResult> {
321
402
  const { paths, company, message, onConflict, vaultConfig, entityContext, hqRoot, skipUnchanged, propagateDeletes } = options;
322
- // Default to the safer "owned-only" policy when delete-propagation is on
323
- // but the caller hasn't pinned a policy. Pre-existing callers that passed
324
- // `propagateDeletes: true` (the `sync now` push leg, the runner's
325
- // bidirectional sync, the `--all` fanout) thereby flip to the safer
326
- // semantics automatically. Set `propagateDeletePolicy: "all"` explicitly
327
- // to opt back into the legacy any-missing-file-deletes behaviour.
328
- const propagateDeletePolicy: "owned-only" | "all" =
403
+ // Default to "owned-only" the pre-5.24 behavior — when delete-propagation
404
+ // is on but the caller hasn't pinned a policy. Staged-default rollout
405
+ // (see CHANGELOG / PR for hq-cloud 5.24.0): 5.24 ships the currency-gated
406
+ // CODE PATH plus the conflict-mirror exclusion (which is policy-
407
+ // independent and immediately stops new litter), but holds the default
408
+ // flip to a later release after soak. Opt into the safer policy now via
409
+ // `propagateDeletePolicy: "currency-gated"` (explicit) or
410
+ // `HQ_SYNC_DELETE_POLICY=currency-gated` (env, honored by sync-runner).
411
+ // The default flip to `"currency-gated"` is scheduled for 5.25.0.
412
+ const propagateDeletePolicy: "currency-gated" | "owned-only" | "all" =
329
413
  options.propagateDeletePolicy ?? "owned-only";
330
414
  const emit = options.onEvent ?? defaultConsoleLogger;
331
415
 
@@ -387,6 +471,13 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
387
471
  let bytesUploaded = 0;
388
472
  let filesSkipped = 0;
389
473
  let filesDeleted = 0;
474
+ // Tombstone and refused-stale counts mirror the deletePlan buckets so the
475
+ // ShareResult can report them without the caller having to count events.
476
+ // Populated only after Stage 3 runs (deletePlan is computed first, then
477
+ // mutated through the execution loop) — initial zero handles the
478
+ // propagateDeletes=false path.
479
+ let filesTombstoned = 0;
480
+ let filesRefusedStale = 0;
390
481
  const conflictPaths: string[] = [];
391
482
 
392
483
  // Collect all files to share
@@ -406,15 +497,16 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
406
497
  const deleteScopeRoots = propagateDeletes === true
407
498
  ? resolveDeleteScopeRoots(paths, hqRoot, syncRoot)
408
499
  : [];
409
- const deletePlan = propagateDeletes === true
410
- ? computeDeletePlan(
500
+ const deletePlan: DeletePlan = propagateDeletes === true
501
+ ? await computeDeletePlan(
411
502
  journal,
412
503
  syncRoot,
413
504
  deleteScopeRoots,
414
505
  shouldSync,
415
506
  propagateDeletePolicy,
507
+ ctx,
416
508
  )
417
- : [];
509
+ : { toDelete: [], toTombstone: [], refusedStale: [] };
418
510
 
419
511
  emit({
420
512
  type: "plan",
@@ -427,7 +519,11 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
427
519
  // Push conflicts require a remote HEAD; we don't yet do that in Stage 1,
428
520
  // so this stays 0. V1.5 (single LIST) will let us classify them up-front.
429
521
  filesToConflict: 0,
430
- filesToDelete: deletePlan.length,
522
+ // Reported count is the deletes we're actually going to issue — does NOT
523
+ // include tombstones (no S3 call) or refused-stale (no journal change).
524
+ // Refusals surface as their own event stream so consumers that care can
525
+ // render a "kept on remote: N" line separately.
526
+ filesToDelete: deletePlan.toDelete.length,
431
527
  });
432
528
 
433
529
  // Stage 2: execute. Skip items pre-classified as no-ops, then for each
@@ -498,6 +594,11 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
498
594
  bytesUploaded,
499
595
  filesSkipped,
500
596
  filesDeleted,
597
+ // Abort path: delete stage never runs, so tombstone + refused-
598
+ // stale counts are necessarily zero. Explicit fields keep the
599
+ // ShareResult shape stable for consumers that destructure.
600
+ filesTombstoned,
601
+ filesRefusedStale,
501
602
  conflictPaths,
502
603
  aborted: true,
503
604
  };
@@ -554,12 +655,28 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
554
655
  }
555
656
  }
556
657
 
557
- // Stage 3: propagate deletes. Each call writes a delete-marker (versioning
558
- // is enabled on the bucket) and removes the corresponding journal entry so
559
- // the next sync sees the key as truly gone on this machine. A failed
560
- // DeleteObject leaves both the journal entry and the remote object intact
561
- // the next run will retry.
562
- for (const relativePath of deletePlan) {
658
+ // Stage 3: propagate deletes. Three buckets, three actions:
659
+ //
660
+ // 1. `toDelete` write a delete-marker (versioning is enabled on the
661
+ // bucket so the delete is soft and prior versions remain recoverable)
662
+ // and remove the journal entry so the next sync sees the key as
663
+ // truly gone on this machine. A failed DeleteObject leaves both
664
+ // the journal entry and remote object intact — the next run retries.
665
+ //
666
+ // 2. `toTombstone` — the remote was 404 at HEAD time (cleaned up out
667
+ // of band, e.g. someone hand-deleted via console). No DeleteObject
668
+ // needed; just drop the journal entry so the journal converges with
669
+ // reality. Emit a synthetic `progress` event with `deleted: true`
670
+ // and bytes=0 so consumers see the convergence.
671
+ //
672
+ // 3. `refusedStale` — under `currency-gated`, the remote's current
673
+ // ETag no longer matches the journal's recorded one. Some other
674
+ // device modified the file since this device last synced it. Keep
675
+ // the remote intact; keep the journal entry intact. The next pull
676
+ // leg of `sync now` re-pulls naturally via the existing
677
+ // `hasRemoteChanged` path. Emit a dedicated event so UIs can
678
+ // surface the refusal without inferring it from absence.
679
+ for (const relativePath of deletePlan.toDelete) {
563
680
  if (vaultConfig && isExpiringSoon(ctx.expiresAt)) {
564
681
  ctx = await refreshEntityContext(companyRef, vaultConfig);
565
682
  }
@@ -583,6 +700,27 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
583
700
  });
584
701
  }
585
702
  }
703
+ for (const relativePath of deletePlan.toTombstone) {
704
+ removeEntry(journal, relativePath);
705
+ filesTombstoned++;
706
+ emit({
707
+ type: "progress",
708
+ path: relativePath,
709
+ bytes: 0,
710
+ deleted: true,
711
+ message: "tombstone (remote already 404)",
712
+ });
713
+ }
714
+ for (const refused of deletePlan.refusedStale) {
715
+ filesRefusedStale++;
716
+ emit({
717
+ type: "delete-refused-stale-etag",
718
+ path: refused.key,
719
+ journalEtag: refused.journalEtag,
720
+ remoteEtag: refused.remoteEtag,
721
+ reason: refused.reason,
722
+ });
723
+ }
586
724
 
587
725
  // See cli/sync.ts: stamp lastSync on completion so a no-op share still
588
726
  // ticks the "Last sync" indicator.
@@ -594,6 +732,8 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
594
732
  bytesUploaded,
595
733
  filesSkipped,
596
734
  filesDeleted,
735
+ filesTombstoned,
736
+ filesRefusedStale,
597
737
  conflictPaths,
598
738
  aborted: false,
599
739
  };
@@ -612,7 +752,12 @@ function defaultConsoleLogger(event: SyncProgressEvent): void {
612
752
  }
613
753
  } else if (event.type === "progress") {
614
754
  if (event.deleted) {
615
- console.log(` ${event.path} (deleted)`);
755
+ // Append `message` when present (e.g. tombstone events carry
756
+ // "tombstone (remote already 404)"). Without this, tombstones and
757
+ // real deletes render byte-identically in the tty stream, and
758
+ // operators have no way to distinguish from logs alone.
759
+ const suffix = event.message ? ` — ${event.message}` : "";
760
+ console.log(` ✗ ${event.path} (deleted)${suffix}`);
616
761
  } else if (event.message) {
617
762
  console.log(` ✓ ${event.path} — "${event.message}"`);
618
763
  } else {
@@ -624,6 +769,19 @@ function defaultConsoleLogger(event: SyncProgressEvent): void {
624
769
  );
625
770
  } else if (event.type === "error") {
626
771
  console.error(` ✗ ${event.path} — ${event.message}`);
772
+ } else if (event.type === "delete-refused-stale-etag") {
773
+ // Branch on `reason`, not on the sentinel etag strings, so legacy
774
+ // entries render with a clear explanation instead of "<legacy-no-etag>"
775
+ // leaking into operator-visible output.
776
+ if (event.reason === "legacy-no-etag") {
777
+ console.error(
778
+ ` ⚠ no-etag-on-record, kept on remote: ${event.path} (journal entry predates etag tracking)`,
779
+ );
780
+ } else {
781
+ console.error(
782
+ ` ⚠ stale-etag, kept on remote: ${event.path} (journal=${event.journalEtag}, remote=${event.remoteEtag})`,
783
+ );
784
+ }
627
785
  }
628
786
  }
629
787
 
@@ -680,6 +838,12 @@ function collectFiles(
680
838
  for (const p of paths) {
681
839
  const absolutePath = path.isAbsolute(p) ? p : path.resolve(hqRoot, p);
682
840
 
841
+ // Ephemeral artifacts (conflict mirrors) — see EPHEMERAL_PATH_PATTERN doc.
842
+ // Caller may pass one explicitly; we still refuse to upload it. Basename
843
+ // check matches the walkDir gate so behavior is identical whether the
844
+ // mirror is the user-supplied path or found during directory recursion.
845
+ if (isEphemeralPath(path.basename(absolutePath))) continue;
846
+
683
847
  // existsSync follows symlinks: a dangling top-level link will report
684
848
  // not-existing and be skipped here. lstatSync below handles the
685
849
  // valid-link case directly without needing the existsSync gate.
@@ -758,6 +922,11 @@ function walkDir(
758
922
 
759
923
  const entries = fs.readdirSync(dir, { withFileTypes: true });
760
924
  for (const entry of entries) {
925
+ // Ephemeral artifacts (conflict mirrors) are local-only safety backups
926
+ // that MUST NEVER round-trip to S3. Check basename here so the filter
927
+ // applies regardless of which company root contains them. See
928
+ // EPHEMERAL_PATH_PATTERN doc for the full rationale.
929
+ if (isEphemeralPath(entry.name)) continue;
761
930
  const absolutePath = path.join(dir, entry.name);
762
931
  const isDir = entry.isDirectory();
763
932
 
@@ -898,38 +1067,144 @@ function resolveDeleteScopeRoots(
898
1067
  return Array.from(prefixes);
899
1068
  }
900
1069
 
1070
+ /**
1071
+ * Reason a candidate was bucketed into `refusedStale`. Discriminated so
1072
+ * consumers (UI, telemetry, the event logger) can branch on intent without
1073
+ * string-comparing the placeholder etag value.
1074
+ * - `"stale-etag"` → currency-gated saw a real etag mismatch (peer
1075
+ * drift). `journalEtag` and `remoteEtag` are both
1076
+ * real ETag values.
1077
+ * - `"legacy-no-etag"` → journal entry was written before remoteEtag was
1078
+ * tracked. `journalEtag` and `remoteEtag` are
1079
+ * placeholder sentinels — do not display as ETags.
1080
+ */
1081
+ type RefusedStaleReason = "stale-etag" | "legacy-no-etag";
1082
+
1083
+ /**
1084
+ * Three buckets returned by computeDeletePlan, exposed so the execution
1085
+ * loop can take a different action for each:
1086
+ * - `toDelete` → issue DeleteObject + drop journal entry.
1087
+ * - `toTombstone` → no DeleteObject (remote already 404), drop journal
1088
+ * entry. Lets the journal converge with reality even
1089
+ * when the remote was cleaned out-of-band.
1090
+ * - `refusedStale` → no DeleteObject, no journal change. Some other
1091
+ * device modified the remote object since this device
1092
+ * last synced it; the next pull leg re-pulls via the
1093
+ * same `hasRemoteChanged` path the conflict detector
1094
+ * uses. Emitted as `delete-refused-stale-etag` events.
1095
+ */
1096
+ interface DeletePlan {
1097
+ toDelete: string[];
1098
+ toTombstone: string[];
1099
+ refusedStale: Array<{
1100
+ key: string;
1101
+ journalEtag: string;
1102
+ remoteEtag: string;
1103
+ reason: RefusedStaleReason;
1104
+ }>;
1105
+ }
1106
+
1107
+ /**
1108
+ * Concurrency cap for the per-file HEAD-O-meter (currency-gated). Sequential
1109
+ * HEADs would add ~N×(50-200ms) to a sync — for the 261-mirror real-world
1110
+ * case that's 15-50s of latency. 16-way concurrency keeps S3 well within
1111
+ * per-prefix burst limits (~3,500 GET/HEAD/sec/prefix is the documented
1112
+ * floor) and bounded under the AWS-SDK default agent's max-sockets so we
1113
+ * don't compete with the in-flight upload pool.
1114
+ */
1115
+ const DELETE_PLAN_HEAD_CONCURRENCY = 16;
1116
+
901
1117
  /**
902
1118
  * Walk every journal key in `scopeRoots` whose local file is missing from
903
- * disk and return the keys eligible for a remote `DeleteObject`. An entry
904
- * is in the plan only when ALL of the following hold:
1119
+ * disk and bucket each candidate into the right action per `policy`. Hard
1120
+ * filters that drop a candidate entirely (no bucket) regardless of policy:
905
1121
  *
906
- * 1. Its key matches (or sits beneath) one of the `scopeRoots` prefixes.
907
- * 2. Its local file is missing from disk.
908
- * 3. The current ignore filter (`shouldSync`) accepts the keyso paths
1122
+ * 1. Its key must match (or sit beneath) one of the `scopeRoots` prefixes.
1123
+ * 2. Its local file must be missing from disk (lstat ENOENT). We use
1124
+ * `lstat` (not `existsSync`) so a dangling symlink a link whose
1125
+ * target has been removed but whose link file is still on disk —
1126
+ * counts as "still present locally" and is NOT delete-propagated.
1127
+ * Pre-fix, existsSync followed the link, returned false, and the
1128
+ * entry was queued for remote DeleteObject in the same sync that
1129
+ * had just uploaded it via `uploadSymlink` — the link round-tripped
1130
+ * as "upload, then delete" in one cycle. ENOENT means truly absent
1131
+ * → eligible; other lstat errors propagate.
1132
+ * 3. The current ignore filter (`shouldSync`) accepts the key — paths
909
1133
  * filtered out by `.hqignore` / `.gitignore` / `DEFAULT_IGNORES` are
910
- * never delete-propagated. This blocks the failure mode where a path
911
- * lives in the vault (and the journal) but the local walk skips it
912
- * because of asymmetric ignore rules; without this guard the push
913
- * leg would erase it.
914
- * 4. When `policy === "owned-only"`: the journal entry's `direction`
915
- * is `"up"` (i.e. this machine previously uploaded the file). This
916
- * blocks the failure mode where a behind machine's first `sync now`
917
- * push leg would otherwise erase recent uploads from peers, since
918
- * those entries are recorded as `direction: "down"` (pulled) or
919
- * absent (never seen). Set `policy: "all"` to opt back into the
920
- * legacy any-missing-file-deletes behaviour.
1134
+ * never delete-propagated. Closes the failure mode where a path lives
1135
+ * in the vault (and journal) but the local walk skips it because of
1136
+ * asymmetric ignore rules.
1137
+ *
1138
+ * Dual-hint probe: by the time we're considering this entry for
1139
+ * remote deletion, the local file is already gone we have no way to
1140
+ * know whether it was a regular file or a symlink record. A single
1141
+ * `isDir=false` probe would silently keep the remote record alive
1142
+ * whenever the only matching `.hqinclude` allowlist pattern is dir-
1143
+ * only (e.g. `companies/*\/knowledge/`), since gitignore's slash
1144
+ * semantics reject the slashless probe. The same dual-hint pattern in
1145
+ * `walkDir`/`collectFiles` (push) and `computePullPlan` (pull) applies
1146
+ * symmetrically here. Pure path lookup, no I/O.
1147
+ * 4. The key does NOT match `EPHEMERAL_PATH_PATTERN`. Conflict mirrors
1148
+ * are local-only artifacts that should never have been journaled in
1149
+ * the first place; the dedicated reconcile command sweeps already-
1150
+ * journaled mirrors. Excluding them here keeps a regular `sync now`
1151
+ * from accidentally deleting a mirror another device is still
1152
+ * reviewing.
1153
+ *
1154
+ * Then per-policy bucketing:
1155
+ *
1156
+ * - `"currency-gated"` (default, safest): issue a HEAD against the remote.
1157
+ * 200 + `normalizeEtag(remote) === entry.remoteEtag` → `toDelete`.
1158
+ * 200 + mismatch → `refusedStale` (peer drift; let pull re-pull).
1159
+ * 404 → `toTombstone` (remote was cleaned out-of-band).
1160
+ * If the journal entry has no recorded `remoteEtag` (legacy entries
1161
+ * written before etag tracking), the candidate falls back to
1162
+ * `refusedStale` with `reason: "legacy-no-etag"` — we can't prove
1163
+ * currency without an etag, so refusal is the safe direction. The
1164
+ * journal entry survives so a future sync with a recorded etag can
1165
+ * re-evaluate.
1166
+ *
1167
+ * HEAD calls are batched at `DELETE_PLAN_HEAD_CONCURRENCY` so a large
1168
+ * candidate set (e.g. a one-shot reconcile sweep) doesn't serialize
1169
+ * into N×RTT latency. The candidate set is materialized into a list
1170
+ * first (synchronous filters above), then the HEAD pass runs in
1171
+ * bounded-parallel chunks.
1172
+ *
1173
+ * Note: there is a TOCTOU window between this HEAD and the eventual
1174
+ * `deleteRemoteFile` call in the share() execution loop. If a peer
1175
+ * overwrites the object in that window (~50-200ms), the resulting
1176
+ * delete-marker lands on a newer version than we verified. S3
1177
+ * versioning makes the worst case recoverable (prior versions are
1178
+ * retained), and the conditional-delete primitive does not exist on
1179
+ * S3 DeleteObject — only PutObject/CopyObject accept `IfMatch`. The
1180
+ * window is bounded, not zero. Realtime sync (separate work) reduces
1181
+ * it further by keeping the journal continuously fresh.
1182
+ * - `"owned-only"`: include only entries with `direction === "up"`. No
1183
+ * HEAD round-trip. Goes to `toDelete`. Legacy fallback.
1184
+ * - `"all"`: include every candidate. No HEAD, no direction check. Goes
1185
+ * to `toDelete`. Caller has explicitly opted out of safety gates.
921
1186
  *
922
1187
  * Empty `scopeRoots` ⇒ empty plan (caller didn't opt in).
923
1188
  */
924
- function computeDeletePlan(
1189
+ async function computeDeletePlan(
925
1190
  journal: SyncJournal,
926
1191
  syncRoot: string,
927
1192
  scopeRoots: string[],
928
1193
  shouldSync: (filePath: string, isDir?: boolean) => boolean,
929
- policy: "owned-only" | "all",
930
- ): string[] {
931
- if (scopeRoots.length === 0) return [];
932
- const out: string[] = [];
1194
+ policy: "currency-gated" | "owned-only" | "all",
1195
+ ctx: EntityContext,
1196
+ ): Promise<DeletePlan> {
1197
+ const plan: DeletePlan = { toDelete: [], toTombstone: [], refusedStale: [] };
1198
+ if (scopeRoots.length === 0) return plan;
1199
+
1200
+ // Stage 1: synchronous pre-filter. Walk every journal entry and either
1201
+ // drop it (hard filter), assign it directly to a bucket (owned-only /
1202
+ // all), or queue it for HEAD (currency-gated). Keeping this synchronous
1203
+ // means the HEAD pass below sees a single, deduplicated candidate list
1204
+ // and the journal-mutation buckets are already settled before any I/O.
1205
+ type HeadCandidate = { key: string; journalEtag: string };
1206
+ const headCandidates: HeadCandidate[] = [];
1207
+
933
1208
  for (const [relativeKey, entry] of Object.entries(journal.files)) {
934
1209
  const inScope = scopeRoots.some(
935
1210
  (root) =>
@@ -939,14 +1214,6 @@ function computeDeletePlan(
939
1214
  );
940
1215
  if (!inScope) continue;
941
1216
  const localPath = path.join(syncRoot, relativeKey);
942
- // lstat (not existsSync) so a dangling symlink — a link whose
943
- // target has been removed but whose link file is still on disk —
944
- // counts as "still present locally" and is NOT delete-propagated.
945
- // Pre-fix, existsSync followed the link, returned false, and the
946
- // entry was queued for remote DeleteObject in the same sync that
947
- // had just uploaded it via uploadSymlink. The link round-tripped
948
- // as "upload, then delete" in one cycle. ENOENT means truly
949
- // absent → eligible; other lstat errors propagate.
950
1217
  let presentLocally = true;
951
1218
  try {
952
1219
  fs.lstatSync(localPath);
@@ -963,24 +1230,67 @@ function computeDeletePlan(
963
1230
  }
964
1231
  }
965
1232
  if (presentLocally) continue;
966
- // (3) Symmetric filter guard. `shouldSync` is constructed from the same
967
- // hqRoot the pull leg uses, so a key the pull would have skipped
968
- // ("ignored") is also one we must not delete-propagate.
969
- //
970
- // Dual-hint probe: by the time we're considering this entry for
971
- // remote deletion, the local file is already gone — we have no
972
- // way to know whether it was a regular file or a symlink record.
973
- // A single isDir=false probe would silently keep the remote
974
- // record alive whenever the only matching .hqinclude allowlist
975
- // pattern is dir-only (e.g. `companies/*/knowledge/`), since
976
- // gitignore's slash semantics reject the slashless probe. The
977
- // same dual-hint pattern in walkDir/collectFiles (push) and
978
- // computePullPlan (pull) applies symmetrically here. Pure path
979
- // lookup, no I/O.
980
1233
  if (!shouldSync(localPath, false) && !shouldSync(localPath, true)) continue;
981
- // (4) Direction guard under "owned-only" policy.
982
- if (policy === "owned-only" && entry.direction !== "up") continue;
983
- out.push(relativeKey);
1234
+ // Ephemeral artifacts (conflict mirrors) never propagate-delete via the
1235
+ // normal path see EPHEMERAL_PATH_PATTERN doc.
1236
+ if (isEphemeralPath(relativeKey)) continue;
1237
+
1238
+ if (policy === "all") {
1239
+ plan.toDelete.push(relativeKey);
1240
+ continue;
1241
+ }
1242
+ if (policy === "owned-only") {
1243
+ if (entry.direction !== "up") continue;
1244
+ plan.toDelete.push(relativeKey);
1245
+ continue;
1246
+ }
1247
+ // currency-gated: queue for HEAD unless the entry is legacy (no etag).
1248
+ const journalEtag = entry.remoteEtag;
1249
+ if (!journalEtag) {
1250
+ plan.refusedStale.push({
1251
+ key: relativeKey,
1252
+ journalEtag: "<legacy-no-etag>",
1253
+ remoteEtag: "<unknown>",
1254
+ reason: "legacy-no-etag",
1255
+ });
1256
+ continue;
1257
+ }
1258
+ headCandidates.push({ key: relativeKey, journalEtag });
1259
+ }
1260
+
1261
+ // Stage 2: bounded-parallel HEAD pass. Promise.all over chunks of size
1262
+ // `DELETE_PLAN_HEAD_CONCURRENCY` so a large candidate set doesn't
1263
+ // serialize into N round-trips, and so we don't burst past the AWS-SDK
1264
+ // default agent's per-host socket cap. Each result is bucketed
1265
+ // independently — one failed HEAD doesn't poison the others (errors
1266
+ // propagate from the chunk's Promise.all and are surfaced by share()'s
1267
+ // outer try/catch, mirroring the existing pre-share error handling).
1268
+ for (let i = 0; i < headCandidates.length; i += DELETE_PLAN_HEAD_CONCURRENCY) {
1269
+ const chunk = headCandidates.slice(i, i + DELETE_PLAN_HEAD_CONCURRENCY);
1270
+ const results = await Promise.all(
1271
+ chunk.map(async (c) => ({
1272
+ candidate: c,
1273
+ remote: await headRemoteFile(ctx, c.key),
1274
+ })),
1275
+ );
1276
+ for (const { candidate, remote } of results) {
1277
+ if (remote === null) {
1278
+ plan.toTombstone.push(candidate.key);
1279
+ continue;
1280
+ }
1281
+ const currentEtag = normalizeEtag(remote.etag);
1282
+ if (currentEtag === candidate.journalEtag) {
1283
+ plan.toDelete.push(candidate.key);
1284
+ } else {
1285
+ plan.refusedStale.push({
1286
+ key: candidate.key,
1287
+ journalEtag: candidate.journalEtag,
1288
+ remoteEtag: currentEtag,
1289
+ reason: "stale-etag",
1290
+ });
1291
+ }
1292
+ }
984
1293
  }
985
- return out;
1294
+
1295
+ return plan;
986
1296
  }
package/src/cli/sync.ts CHANGED
@@ -92,6 +92,34 @@ export type SyncProgressEvent =
92
92
  | {
93
93
  type: "new-files";
94
94
  files: Array<{ path: string; bytes: number; addedBy: string | null }>;
95
+ }
96
+ | {
97
+ /**
98
+ * Emitted by the `currency-gated` delete policy when a delete candidate
99
+ * is refused. Two reasons, discriminated by `reason`:
100
+ *
101
+ * - `"stale-etag"`: the local file is missing but the remote object's
102
+ * current ETag no longer matches the journal's last-recorded etag.
103
+ * Some other device (or another out-of-band write) modified the
104
+ * remote object since this machine last synced it. The pull leg of
105
+ * `sync now` will re-pull naturally via the same `hasRemoteChanged`
106
+ * path that powers conflict detection. Journal entry is left intact
107
+ * so the pull can use it as the baseline for the 3-way merge.
108
+ * `journalEtag` and `remoteEtag` are real ETag strings.
109
+ *
110
+ * - `"legacy-no-etag"`: the journal entry predates remoteEtag tracking
111
+ * (no `remoteEtag` recorded). We can't prove currency without an
112
+ * etag, so the delete is refused in the safe direction. A future
113
+ * sync that picks up an etag for this entry can re-evaluate.
114
+ * `journalEtag` and `remoteEtag` are sentinel strings
115
+ * (`<legacy-no-etag>` / `<unknown>`) — do not render as ETags.
116
+ * Consumers should branch on `reason`, not on the etag values.
117
+ */
118
+ type: "delete-refused-stale-etag";
119
+ path: string;
120
+ journalEtag: string;
121
+ remoteEtag: string;
122
+ reason: "stale-etag" | "legacy-no-etag";
95
123
  };
96
124
 
97
125
  export interface SyncOptions {