@indigoai-us/hq-cloud 6.2.7 → 6.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/bin/sync-runner.d.ts +22 -2
  2. package/dist/bin/sync-runner.d.ts.map +1 -1
  3. package/dist/bin/sync-runner.js +105 -3
  4. package/dist/bin/sync-runner.js.map +1 -1
  5. package/dist/bin/sync-runner.test.js +262 -0
  6. package/dist/bin/sync-runner.test.js.map +1 -1
  7. package/dist/cli/reindex.d.ts +8 -0
  8. package/dist/cli/reindex.d.ts.map +1 -1
  9. package/dist/cli/reindex.js +222 -198
  10. package/dist/cli/reindex.js.map +1 -1
  11. package/dist/cli/reindex.test.js +35 -0
  12. package/dist/cli/reindex.test.js.map +1 -1
  13. package/dist/cli/rescue-core.js +14 -2
  14. package/dist/cli/rescue-core.js.map +1 -1
  15. package/dist/cli/rescue-hq-root-guard.test.d.ts +2 -0
  16. package/dist/cli/rescue-hq-root-guard.test.d.ts.map +1 -0
  17. package/dist/cli/rescue-hq-root-guard.test.js +176 -0
  18. package/dist/cli/rescue-hq-root-guard.test.js.map +1 -0
  19. package/dist/cli/rescue.d.ts.map +1 -1
  20. package/dist/cli/rescue.js +39 -16
  21. package/dist/cli/rescue.js.map +1 -1
  22. package/dist/cli/rescue.reindex.test.js +15 -2
  23. package/dist/cli/rescue.reindex.test.js.map +1 -1
  24. package/dist/cli/sync.d.ts.map +1 -1
  25. package/dist/cli/sync.js +3 -1
  26. package/dist/cli/sync.js.map +1 -1
  27. package/dist/cli/sync.test.js +2 -1
  28. package/dist/cli/sync.test.js.map +1 -1
  29. package/dist/operation-lock.d.ts +100 -0
  30. package/dist/operation-lock.d.ts.map +1 -0
  31. package/dist/operation-lock.js +256 -0
  32. package/dist/operation-lock.js.map +1 -0
  33. package/dist/operation-lock.test.d.ts +5 -0
  34. package/dist/operation-lock.test.d.ts.map +1 -0
  35. package/dist/operation-lock.test.js +140 -0
  36. package/dist/operation-lock.test.js.map +1 -0
  37. package/dist/sync/event-sync.d.ts +181 -0
  38. package/dist/sync/event-sync.d.ts.map +1 -0
  39. package/dist/sync/event-sync.js +316 -0
  40. package/dist/sync/event-sync.js.map +1 -0
  41. package/dist/sync/event-sync.test.d.ts +14 -0
  42. package/dist/sync/event-sync.test.d.ts.map +1 -0
  43. package/dist/sync/event-sync.test.js +440 -0
  44. package/dist/sync/event-sync.test.js.map +1 -0
  45. package/package.json +1 -1
  46. package/src/bin/sync-runner.test.ts +323 -0
  47. package/src/bin/sync-runner.ts +139 -4
  48. package/src/cli/reindex.test.ts +45 -0
  49. package/src/cli/reindex.ts +36 -0
  50. package/src/cli/rescue-core.ts +15 -2
  51. package/src/cli/rescue-hq-root-guard.test.ts +193 -0
  52. package/src/cli/rescue.reindex.test.ts +17 -2
  53. package/src/cli/rescue.ts +40 -15
  54. package/src/cli/sync.test.ts +2 -1
  55. package/src/cli/sync.ts +3 -1
  56. package/src/operation-lock.test.ts +162 -0
  57. package/src/operation-lock.ts +293 -0
  58. package/src/sync/event-sync.test.ts +533 -0
  59. package/src/sync/event-sync.ts +481 -0
  60. package/test/e2e/sync/cross-tenant-isolation.test.ts +126 -0
@@ -30,6 +30,7 @@ import type {
30
30
  } from "./sync-runner.js";
31
31
  import { FakeClock } from "../watcher.js";
32
32
  import { PERSONAL_VAULT_JOURNAL_SLUG } from "../journal.js";
33
+ import { lockPathFor, OPERATION_LOCKED_EXIT } from "../operation-lock.js";
33
34
  import type { SyncResult, SyncOptions } from "../cli/sync.js";
34
35
  import type { ShareResult, ShareOptions } from "../cli/share.js";
35
36
  import type {
@@ -3566,3 +3567,325 @@ describe("readPinnedPrefixes", () => {
3566
3567
  expect(readPinnedPrefixes(root, "acme")).toEqual([]);
3567
3568
  });
3568
3569
  });
3570
+
3571
+ // ---------------------------------------------------------------------------
3572
+ // Operation lock — one-shot sync takes it; the watch runner is exempt.
3573
+ // ---------------------------------------------------------------------------
3574
+ describe("runRunnerWithLoop — operation lock", () => {
3575
+ const HQ = "/tmp/hq-oplock";
3576
+
3577
+ /** Write a live-holder lock (pid 1) for the HQ root into the test state dir. */
3578
+ function writeLiveHolder(command: string): string {
3579
+ const p = lockPathFor(HQ);
3580
+ fs.mkdirSync(path.dirname(p), { recursive: true });
3581
+ fs.writeFileSync(
3582
+ p,
3583
+ JSON.stringify({ pid: 1, command, startedAt: new Date(0).toISOString(), hqRoot: HQ }),
3584
+ );
3585
+ return p;
3586
+ }
3587
+
3588
+ it("one-shot sync refuses fast (exit 17) when another op holds the root", async () => {
3589
+ const lp = writeLiveHolder("rescue");
3590
+ const errs: string[] = [];
3591
+ const spy = vi
3592
+ .spyOn(process.stderr, "write")
3593
+ .mockImplementation((chunk: string | Uint8Array) => {
3594
+ errs.push(String(chunk));
3595
+ return true;
3596
+ });
3597
+
3598
+ // No --watch → one-shot. Refusal short-circuits BEFORE runRunner (so no
3599
+ // network / auth is touched).
3600
+ const code = await runRunnerWithLoop(["--companies", "--hq-root", HQ]);
3601
+
3602
+ spy.mockRestore();
3603
+ expect(code).toBe(OPERATION_LOCKED_EXIT);
3604
+ expect(errs.join("")).toContain("rescue"); // names the holder
3605
+ // The holder's lock is left intact — we refused, we didn't take it over.
3606
+ const held = JSON.parse(fs.readFileSync(lp, "utf8"));
3607
+ expect(held.pid).toBe(1);
3608
+ expect(held.command).toBe("rescue");
3609
+ });
3610
+
3611
+ it("the watch runner is EXEMPT — runs despite a held lock and never takes it", async () => {
3612
+ const lp = writeLiveHolder("sync");
3613
+ const watcher = makeWatcherStub();
3614
+ let triggerShutdown = () => {};
3615
+ const runPass = vi.fn().mockResolvedValue(0);
3616
+
3617
+ const loop = runRunnerWithLoop(
3618
+ ["--companies", "--watch", "--event-push", "--hq-root", HQ],
3619
+ {
3620
+ runPass,
3621
+ clock: new FakeClock(),
3622
+ createWatcher: () => watcher,
3623
+ sleep: () => new Promise<void>(() => {}),
3624
+ onShutdownSignal: (handler) => {
3625
+ triggerShutdown = handler;
3626
+ return () => {};
3627
+ },
3628
+ },
3629
+ );
3630
+
3631
+ await Promise.resolve();
3632
+ await Promise.resolve();
3633
+ // It started and ran a pass even though the lock is held → not blocked.
3634
+ expect(watcher.started).toBe(true);
3635
+ expect(runPass).toHaveBeenCalled();
3636
+
3637
+ triggerShutdown();
3638
+ await loop;
3639
+
3640
+ // The pre-existing holder lock is untouched → the watcher never took it.
3641
+ const held = JSON.parse(fs.readFileSync(lp, "utf8"));
3642
+ expect(held.pid).toBe(1);
3643
+ expect(held.command).toBe("sync");
3644
+ });
3645
+ });
3646
+
3647
+ // ---------------------------------------------------------------------------
3648
+ // Phase 3 — event-driven publish + pull wiring (US-017/018/019)
3649
+ // ---------------------------------------------------------------------------
3650
+ //
3651
+ // Exercises the rollout-gated bring-up through the `startEventSync` /
3652
+ // `getIdTokenClaims` seams: gate-off is byte-identical to today (the seam is
3653
+ // never consulted), gate-on passes the wiring the right identity + sync
3654
+ // bridge, and the publish leg fires only AFTER a SUCCESSFUL targeted push
3655
+ // pass (events must never announce bytes that are not in S3 yet).
3656
+
3657
+ interface BatchWatcherStub extends WatcherSurface {
3658
+ emit(changedRelPath?: string, batch?: { paths: Map<string, string> }): void;
3659
+ disposed: boolean;
3660
+ }
3661
+
3662
+ function makeBatchWatcherStub(): BatchWatcherStub {
3663
+ const listeners = new Set<
3664
+ (p?: string, b?: { paths: Map<string, string> }) => void
3665
+ >();
3666
+ return {
3667
+ disposed: false,
3668
+ onChange(listener) {
3669
+ listeners.add(listener);
3670
+ return () => listeners.delete(listener);
3671
+ },
3672
+ start() {},
3673
+ stop() {},
3674
+ dispose() {
3675
+ this.disposed = true;
3676
+ listeners.clear();
3677
+ },
3678
+ emit(changedRelPath?: string, batch?: { paths: Map<string, string> }) {
3679
+ for (const l of [...listeners]) l(changedRelPath, batch);
3680
+ },
3681
+ };
3682
+ }
3683
+
3684
+ describe("runRunnerWithLoop — Phase 3 event-sync wiring (US-017/018/019)", () => {
3685
+ const ENROLLED_CLAIMS = { email: "hassaan@getindigo.ai" };
3686
+ const UNENROLLED_CLAIMS = { email: "someone@getindigo.ai" };
3687
+
3688
+ function runLoop(opts: {
3689
+ claims: { email?: string } | null;
3690
+ startEventSync?: ReturnType<typeof vi.fn>;
3691
+ watcher?: BatchWatcherStub;
3692
+ runPass?: ReturnType<typeof vi.fn>;
3693
+ }) {
3694
+ const watcher = opts.watcher ?? makeBatchWatcherStub();
3695
+ const runPass = opts.runPass ?? vi.fn().mockResolvedValue(0);
3696
+ const clock = new FakeClock();
3697
+ let triggerShutdown = () => {};
3698
+ const loop = runRunnerWithLoop(
3699
+ ["--companies", "--watch", "--event-push", "--hq-root", "/tmp/hq"],
3700
+ {
3701
+ runPass,
3702
+ clock,
3703
+ createWatcher: () => watcher,
3704
+ sleep: () => new Promise<void>(() => {}),
3705
+ onShutdownSignal: (handler) => {
3706
+ triggerShutdown = handler;
3707
+ return () => {};
3708
+ },
3709
+ getIdTokenClaims: () => opts.claims as never,
3710
+ getAccessToken: async () => "jwt-test",
3711
+ startEventSync: (opts.startEventSync ??
3712
+ vi.fn().mockResolvedValue(null)) as never,
3713
+ },
3714
+ );
3715
+ return { loop, watcher, runPass, clock, shutdown: () => triggerShutdown() };
3716
+ }
3717
+
3718
+ async function microtasks(n = 6) {
3719
+ for (let i = 0; i < n; i++) await Promise.resolve();
3720
+ }
3721
+
3722
+ it("gate OFF (unenrolled email): the event-sync seam is never consulted", async () => {
3723
+ const startEventSync = vi.fn().mockResolvedValue(null);
3724
+ const { loop, shutdown } = runLoop({
3725
+ claims: UNENROLLED_CLAIMS,
3726
+ startEventSync,
3727
+ });
3728
+ await microtasks();
3729
+ shutdown();
3730
+ await loop;
3731
+ expect(startEventSync).not.toHaveBeenCalled();
3732
+ });
3733
+
3734
+ it("gate OFF (no cached claims): the event-sync seam is never consulted", async () => {
3735
+ const startEventSync = vi.fn().mockResolvedValue(null);
3736
+ const { loop, shutdown } = runLoop({ claims: null, startEventSync });
3737
+ await microtasks();
3738
+ shutdown();
3739
+ await loop;
3740
+ expect(startEventSync).not.toHaveBeenCalled();
3741
+ });
3742
+
3743
+ it("gate ON (enrolled email): wiring receives root, api url, auth + a sync bridge", async () => {
3744
+ const startEventSync = vi.fn().mockResolvedValue(null);
3745
+ const { loop, shutdown } = runLoop({
3746
+ claims: ENROLLED_CLAIMS,
3747
+ startEventSync,
3748
+ });
3749
+ await microtasks();
3750
+ shutdown();
3751
+ await loop;
3752
+ expect(startEventSync).toHaveBeenCalledTimes(1);
3753
+ const arg = startEventSync.mock.calls[0][0];
3754
+ expect(arg.hqRoot).toBe("/tmp/hq");
3755
+ expect(arg.apiUrl).toContain("https://");
3756
+ expect(typeof arg.deviceId).toBe("string");
3757
+ expect(arg.deviceId.length).toBeGreaterThan(0);
3758
+ expect(typeof arg.resolveTenantId).toBe("function");
3759
+ expect(typeof arg.syncFn).toBe("function");
3760
+ });
3761
+
3762
+ it("publishes the batch ONLY after a successful targeted push pass", async () => {
3763
+ const publishBatch = vi.fn();
3764
+ const startEventSync = vi.fn().mockResolvedValue({
3765
+ publishBatch,
3766
+ receiver: { start: async () => {}, dispose: async () => {}, connected: true },
3767
+ ownDeviceId: "dev-test",
3768
+ dispose: vi.fn().mockResolvedValue(undefined),
3769
+ });
3770
+ const runPass = vi.fn().mockResolvedValue(0);
3771
+ const { loop, watcher, clock, shutdown } = runLoop({
3772
+ claims: ENROLLED_CLAIMS,
3773
+ startEventSync,
3774
+ runPass,
3775
+ });
3776
+ await microtasks(); // initial poll pass + async event-sync bring-up
3777
+ runPass.mockClear();
3778
+
3779
+ const batch = {
3780
+ paths: new Map([["/tmp/hq/companies/indigo/a.md", "companies/indigo/a.md"]]),
3781
+ };
3782
+ watcher.emit("companies/indigo/a.md", batch);
3783
+ clock.advance(0);
3784
+ await microtasks();
3785
+
3786
+ // Targeted push ran and succeeded → batch published.
3787
+ expect(runPass).toHaveBeenCalled();
3788
+ expect(publishBatch).toHaveBeenCalledTimes(1);
3789
+ expect(publishBatch.mock.calls[0][0]).toBe(batch);
3790
+
3791
+ shutdown();
3792
+ await loop;
3793
+ });
3794
+
3795
+ it("publishes NOTHING when the targeted push pass fails", async () => {
3796
+ const publishBatch = vi.fn();
3797
+ const startEventSync = vi.fn().mockResolvedValue({
3798
+ publishBatch,
3799
+ receiver: { start: async () => {}, dispose: async () => {}, connected: true },
3800
+ ownDeviceId: "dev-test",
3801
+ dispose: vi.fn().mockResolvedValue(undefined),
3802
+ });
3803
+ // Initial poll pass succeeds; the targeted pass fails.
3804
+ const runPass = vi
3805
+ .fn()
3806
+ .mockResolvedValueOnce(0)
3807
+ .mockResolvedValue(1);
3808
+ const { loop, watcher, clock, shutdown } = runLoop({
3809
+ claims: ENROLLED_CLAIMS,
3810
+ startEventSync,
3811
+ runPass,
3812
+ });
3813
+ await microtasks();
3814
+
3815
+ watcher.emit(
3816
+ "companies/indigo/a.md",
3817
+ { paths: new Map([["/tmp/hq/companies/indigo/a.md", "companies/indigo/a.md"]]) },
3818
+ );
3819
+ clock.advance(0);
3820
+ await microtasks();
3821
+
3822
+ expect(publishBatch).not.toHaveBeenCalled();
3823
+ shutdown();
3824
+ await loop;
3825
+ });
3826
+
3827
+ it("falls back to a synthesized single-path batch when the watcher emits a bare path", async () => {
3828
+ const publishBatch = vi.fn();
3829
+ const startEventSync = vi.fn().mockResolvedValue({
3830
+ publishBatch,
3831
+ receiver: { start: async () => {}, dispose: async () => {}, connected: true },
3832
+ ownDeviceId: "dev-test",
3833
+ dispose: vi.fn().mockResolvedValue(undefined),
3834
+ });
3835
+ const { loop, watcher, clock, shutdown } = runLoop({
3836
+ claims: ENROLLED_CLAIMS,
3837
+ startEventSync,
3838
+ });
3839
+ await microtasks();
3840
+
3841
+ watcher.emit("companies/indigo/b.md"); // no batch
3842
+ clock.advance(0);
3843
+ await microtasks();
3844
+
3845
+ expect(publishBatch).toHaveBeenCalledTimes(1);
3846
+ const synthesized = publishBatch.mock.calls[0][0] as {
3847
+ paths: Map<string, string>;
3848
+ };
3849
+ expect([...synthesized.paths.values()]).toEqual(["companies/indigo/b.md"]);
3850
+
3851
+ shutdown();
3852
+ await loop;
3853
+ });
3854
+
3855
+ it("disposes the event-sync handles on shutdown", async () => {
3856
+ const dispose = vi.fn().mockResolvedValue(undefined);
3857
+ const startEventSync = vi.fn().mockResolvedValue({
3858
+ publishBatch: vi.fn(),
3859
+ receiver: { start: async () => {}, dispose: async () => {}, connected: true },
3860
+ ownDeviceId: "dev-test",
3861
+ dispose,
3862
+ });
3863
+ const { loop, shutdown } = runLoop({
3864
+ claims: ENROLLED_CLAIMS,
3865
+ startEventSync,
3866
+ });
3867
+ await microtasks();
3868
+ shutdown();
3869
+ await loop;
3870
+ expect(dispose).toHaveBeenCalled();
3871
+ });
3872
+
3873
+ it("env override HQ_SYNC_EVENT_SYNC=0 keeps the seam dormant for the enrolled account", async () => {
3874
+ const prev = process.env.HQ_SYNC_EVENT_SYNC;
3875
+ process.env.HQ_SYNC_EVENT_SYNC = "0";
3876
+ try {
3877
+ const startEventSync = vi.fn().mockResolvedValue(null);
3878
+ const { loop, shutdown } = runLoop({
3879
+ claims: ENROLLED_CLAIMS,
3880
+ startEventSync,
3881
+ });
3882
+ await microtasks();
3883
+ shutdown();
3884
+ await loop;
3885
+ expect(startEventSync).not.toHaveBeenCalled();
3886
+ } finally {
3887
+ if (prev === undefined) delete process.env.HQ_SYNC_EVENT_SYNC;
3888
+ else process.env.HQ_SYNC_EVENT_SYNC = prev;
3889
+ }
3890
+ });
3891
+ });
@@ -100,6 +100,11 @@ import { collectAndSendTelemetry } from "../telemetry.js";
100
100
  import { collectAndSendSkillTelemetry } from "../skill-telemetry.js";
101
101
  import { reindexAfterSync } from "../qmd-reindex.js";
102
102
  import { pruneConflictIndex } from "../lib/conflict-index.js";
103
+ import {
104
+ withOperationLock,
105
+ OperationLockedError,
106
+ OPERATION_LOCKED_EXIT,
107
+ } from "../operation-lock.js";
103
108
  import { describeError } from "../lib/describe-error.js";
104
109
  import { getOrCreateMachineId } from "../lib/machine-id.js";
105
110
  import {
@@ -107,12 +112,19 @@ import {
107
112
  WatchPushDriver,
108
113
  systemClock,
109
114
  type Clock,
115
+ type TreeChangeBatch,
110
116
  } from "../watcher.js";
111
117
  import {
112
118
  NoopPushReceiver,
113
119
  type PushReceiver,
114
120
  type SyncEngineFn,
115
121
  } from "../sync/push-receiver.js";
122
+ import {
123
+ resolveEventSync,
124
+ startEventSync as defaultStartEventSync,
125
+ type EventSyncHandles,
126
+ type StartEventSyncOptions,
127
+ } from "../sync/event-sync.js";
116
128
  import {
117
129
  PERSONAL_VAULT_JOURNAL_SLUG,
118
130
  migratePersonalVaultJournal,
@@ -1824,6 +1836,27 @@ export interface RunnerLoopDeps {
1824
1836
  syncFn: SyncEngineFn;
1825
1837
  hqRoot: string;
1826
1838
  }) => PushReceiver;
1839
+ /**
1840
+ * Phase 3 (US-017/US-018/US-019): factory for the event-driven publish +
1841
+ * pull wiring, consulted only when `--event-push` is on AND the rollout
1842
+ * gate ({@link resolveEventSync}) passes for the signed-in account.
1843
+ * Defaults to the real {@link defaultStartEventSync}. Tests inject a stub
1844
+ * to assert gate behavior without network/AWS.
1845
+ */
1846
+ startEventSync?: (
1847
+ opts: StartEventSyncOptions,
1848
+ ) => Promise<EventSyncHandles | null>;
1849
+ /**
1850
+ * Identity-claims source for the Phase 3 rollout gate (the loop has no
1851
+ * RunnerDeps; mirror of RunnerDeps.getIdTokenClaims). Defaults to reading
1852
+ * the cached Cognito idToken.
1853
+ */
1854
+ getIdTokenClaims?: () => IdTokenClaims | null;
1855
+ /**
1856
+ * Access-token source for the Phase 3 vault API calls (publish transport +
1857
+ * subscribe). Defaults to {@link getValidAccessToken} non-interactive.
1858
+ */
1859
+ getAccessToken?: () => Promise<string>;
1827
1860
  }
1828
1861
 
1829
1862
  /**
@@ -1838,7 +1871,9 @@ export interface RunnerLoopDeps {
1838
1871
  * relative path so the loop targets just that company.
1839
1872
  */
1840
1873
  export interface WatcherSurface {
1841
- onChange(listener: (changedRelPath?: string) => void): () => void;
1874
+ onChange(
1875
+ listener: (changedRelPath?: string, batch?: TreeChangeBatch) => void,
1876
+ ): () => void;
1842
1877
  start(): void;
1843
1878
  stop(): void;
1844
1879
  dispose(): void;
@@ -1920,7 +1955,23 @@ export async function runRunnerWithLoop(
1920
1955
  deps: RunnerLoopDeps = {},
1921
1956
  ): Promise<number> {
1922
1957
  if (!argv.includes("--watch")) {
1923
- return runRunner(argv);
1958
+ // One-shot cloud sync — take the per-root operation lock so it is mutually
1959
+ // exclusive with rescue/reindex. The `--watch` path below is the push
1960
+ // watcher and is intentionally EXEMPT (it neither takes nor is blocked by
1961
+ // the lock; its in-process targeted passes call `runRunner` directly, not
1962
+ // through here). If args don't parse, fall through to `runRunner` so it
1963
+ // surfaces the parse error rather than us masking it with a lock failure.
1964
+ const parsed = parseArgs(argv);
1965
+ if ("error" in parsed) return runRunner(argv);
1966
+ try {
1967
+ return await withOperationLock(parsed.hqRoot, "sync", () => runRunner(argv));
1968
+ } catch (err) {
1969
+ if (err instanceof OperationLockedError) {
1970
+ process.stderr.write(err.message + "\n");
1971
+ return OPERATION_LOCKED_EXIT;
1972
+ }
1973
+ throw err;
1974
+ }
1924
1975
  }
1925
1976
  const sleep =
1926
1977
  deps.sleep ??
@@ -1978,12 +2029,17 @@ export async function runRunnerWithLoop(
1978
2029
  let driver: WatchPushDriver | null = null;
1979
2030
  let detachSignal: (() => void) | null = null;
1980
2031
  let lastChangedRel: string | null = null;
2032
+ let lastBatch: TreeChangeBatch | null = null;
1981
2033
  // ---- pull-on-event receiver (Phase 2, US-009) ------------------------
1982
2034
  // Started after the watcher, disposed before the watcher (mirror of the
1983
2035
  // PushTransport ordering). Dormant by default: the default factory returns
1984
2036
  // a NoopPushReceiver, and even a real receiver stays dormant unless the
1985
2037
  // per-tenant feature flag is on AND a queue URL is provisioned server-side.
1986
2038
  let receiver: PushReceiver | null = null;
2039
+ // ---- event-driven publish + pull (Phase 3, US-017/018/019) ------------
2040
+ // Brought up asynchronously after the watcher when the rollout gate
2041
+ // passes; null until ready (and stays null on startup failure → poll-only).
2042
+ let eventSync: EventSyncHandles | null = null;
1987
2043
 
1988
2044
  if (eventPush) {
1989
2045
  const clock = deps.clock ?? systemClock;
@@ -2011,12 +2067,28 @@ export async function runRunnerWithLoop(
2011
2067
  push: async () => {
2012
2068
  if (stopped) return;
2013
2069
  const rel = lastChangedRel;
2070
+ // Snapshot the settled batch BEFORE the await: a change landing
2071
+ // mid-pass overwrites lastBatch for the NEXT pass, and this pass
2072
+ // must only announce what it actually pushed.
2073
+ const batchForPublish = lastBatch;
2074
+ lastBatch = null;
2014
2075
  const route = rel
2015
2076
  ? routeChangeToTarget(rel)
2016
2077
  : { kind: "personal" as const };
2017
2078
  if (!route) return;
2018
2079
  const targetedArgv = buildTargetedPushArgv(route, passArgv);
2019
- await runGuarded(() => runPass(targetedArgv));
2080
+ const result = await runGuarded(() => runPass(targetedArgv));
2081
+ // Phase 3 (US-017): publish PushEvents only AFTER the targeted push
2082
+ // pass succeeded — an event must never announce bytes that are not
2083
+ // in S3 yet. A skipped pass (guard held) or a failed pass publishes
2084
+ // nothing; the cadence poll covers the miss. Fall back to a
2085
+ // single-path batch when the watcher emitted a bare path signal.
2086
+ if (result === 0 && eventSync) {
2087
+ const batch: TreeChangeBatch | null =
2088
+ batchForPublish ??
2089
+ (rel ? { paths: new Map([[path.join(hqRoot, rel), rel]]) } : null);
2090
+ if (batch) eventSync.publishBatch(batch);
2091
+ }
2020
2092
  },
2021
2093
  });
2022
2094
 
@@ -2025,9 +2097,10 @@ export async function runRunnerWithLoop(
2025
2097
  // still serialized behind any in-flight pass. A path-aware watcher passes
2026
2098
  // the changed relative path so the push targets just its owning company;
2027
2099
  // the bare-signal TreeWatcher leaves it null → personal-vault route.
2028
- watcher.onChange((changedRelPath) => {
2100
+ watcher.onChange((changedRelPath, batch) => {
2029
2101
  if (stopped) return;
2030
2102
  lastChangedRel = changedRelPath ?? null;
2103
+ lastBatch = batch ?? null;
2031
2104
  driver?.notifyChange();
2032
2105
  });
2033
2106
  watcher.start();
@@ -2056,6 +2129,60 @@ export async function runRunnerWithLoop(
2056
2129
  // start also keeps the poll loop's microtask timing identical to the
2057
2130
  // pre-US-009 wiring.)
2058
2131
  void Promise.resolve(receiver.start()).catch(() => undefined);
2132
+
2133
+ // ---- Phase 3: event-driven publish + pull (US-017/018/019) ----------
2134
+ // Gated to enrolled accounts (resolveEventSync — exact-email allowlist +
2135
+ // HQ_SYNC_EVENT_SYNC override). Brought up asynchronously so a slow
2136
+ // subscribe/vend can't delay the first poll pass; until (and unless) the
2137
+ // handles resolve, behavior is byte-identical to the gate-off path.
2138
+ const getClaims = deps.getIdTokenClaims ?? defaultGetIdTokenClaims;
2139
+ const email = getClaims()?.email;
2140
+ if (resolveEventSync(email, process.env.HQ_SYNC_EVENT_SYNC)) {
2141
+ const getAccessToken =
2142
+ deps.getAccessToken ??
2143
+ (() => getValidAccessToken(DEFAULT_COGNITO, { interactive: false }));
2144
+ const startES = deps.startEventSync ?? defaultStartEventSync;
2145
+ // Entirely async + caught: NOTHING in the Phase 3 bring-up (device-id
2146
+ // persistence, tenant resolution, subscribe) may crash or delay the
2147
+ // daemon — any failure degrades to poll-only.
2148
+ void (async () => {
2149
+ const handles = await startES({
2150
+ hqRoot,
2151
+ apiUrl: DEFAULT_VAULT_API_URL,
2152
+ authToken: getAccessToken,
2153
+ deviceId: getOrCreateMachineId(hqRoot),
2154
+ // The server rejects publishes whose originTenantId mismatches the
2155
+ // JWT principal, so resolve the SAME canonical person uid the vault
2156
+ // API derives from this token.
2157
+ resolveTenantId: async () => {
2158
+ const client = new VaultClient({
2159
+ apiUrl: DEFAULT_VAULT_API_URL,
2160
+ authToken: getAccessToken,
2161
+ region: DEFAULT_COGNITO.region,
2162
+ });
2163
+ const persons = await client.entity.listByType("person");
2164
+ const pick = pickCanonicalPersonEntity(persons);
2165
+ if (!pick?.uid) {
2166
+ throw new Error("no canonical person entity for this account");
2167
+ }
2168
+ return pick.uid;
2169
+ },
2170
+ syncFn: receiverSyncFn,
2171
+ log: (m) => process.stderr.write(`${m}\n`),
2172
+ });
2173
+ if (!handles) return;
2174
+ if (stopped) {
2175
+ // Shutdown raced the async bring-up — tear straight down.
2176
+ void handles.dispose();
2177
+ return;
2178
+ }
2179
+ eventSync = handles;
2180
+ })().catch((err) => {
2181
+ process.stderr.write(
2182
+ `event-sync: wiring failed, continuing poll-only: ${describeError(err)}\n`,
2183
+ );
2184
+ });
2185
+ }
2059
2186
  }
2060
2187
 
2061
2188
  // ---- clean shutdown --------------------------------------------------
@@ -2080,6 +2207,14 @@ export async function runRunnerWithLoop(
2080
2207
  } catch {
2081
2208
  /* ignore */
2082
2209
  }
2210
+ // Phase 3 wiring (publish transport + live receiver) — torn down with
2211
+ // the same fire-and-forget posture as the Phase 2 receiver above.
2212
+ try {
2213
+ void eventSync?.dispose();
2214
+ } catch {
2215
+ /* ignore */
2216
+ }
2217
+ eventSync = null;
2083
2218
  try {
2084
2219
  driver?.dispose();
2085
2220
  } catch {
@@ -13,16 +13,24 @@ import * as fs from "fs";
13
13
  import * as path from "path";
14
14
  import * as os from "os";
15
15
  import { reindex } from "./reindex.js";
16
+ import { lockPathFor, OPERATION_LOCKED_EXIT } from "../operation-lock.js";
16
17
 
17
18
  describe("reindex", () => {
18
19
  let root: string;
20
+ let stateDir: string;
19
21
 
20
22
  beforeEach(() => {
21
23
  root = fs.mkdtempSync(path.join(os.tmpdir(), "ms-test-"));
24
+ // Redirect the operation lock into a throwaway dir so reindex's default
25
+ // locking never touches the real ~/.hq during tests.
26
+ stateDir = fs.mkdtempSync(path.join(os.tmpdir(), "ms-state-"));
27
+ process.env.HQ_STATE_DIR = stateDir;
22
28
  });
23
29
 
24
30
  afterEach(() => {
25
31
  fs.rmSync(root, { recursive: true, force: true });
32
+ fs.rmSync(stateDir, { recursive: true, force: true });
33
+ delete process.env.HQ_STATE_DIR;
26
34
  });
27
35
 
28
36
  function writeSkill(rel: string): void {
@@ -124,4 +132,41 @@ describe("reindex", () => {
124
132
  expect(fs.existsSync(wrapper)).toBe(true);
125
133
  expect(fs.lstatSync(path.join(wrapper, "SKILL.md")).isSymbolicLink()).toBe(true);
126
134
  });
135
+
136
+ // ── operation lock (mutual exclusion with sync/rescue) ──────────────────
137
+
138
+ it("refuses (OPERATION_LOCKED_EXIT) when another op holds this root's lock", () => {
139
+ // A live holder in another process (pid 1) → reindex must refuse fast and
140
+ // do no work.
141
+ const lp = lockPathFor(root);
142
+ fs.mkdirSync(path.dirname(lp), { recursive: true });
143
+ fs.writeFileSync(
144
+ lp,
145
+ JSON.stringify({ pid: 1, command: "sync", startedAt: new Date(0).toISOString(), hqRoot: root }),
146
+ );
147
+ const before = fs.existsSync(path.join(root, ".claude/skills"));
148
+ const { status } = reindex({ repoRoot: root });
149
+ expect(status).toBe(OPERATION_LOCKED_EXIT);
150
+ // It refused before doing any work (didn't create .claude/skills).
151
+ expect(fs.existsSync(path.join(root, ".claude/skills"))).toBe(before);
152
+ });
153
+
154
+ it("skipLock bypasses the lock (internal sync/rescue caller path)", () => {
155
+ const lp = lockPathFor(root);
156
+ fs.mkdirSync(path.dirname(lp), { recursive: true });
157
+ fs.writeFileSync(
158
+ lp,
159
+ JSON.stringify({ pid: 1, command: "sync", startedAt: new Date(0).toISOString(), hqRoot: root }),
160
+ );
161
+ // Even with a live holder on record, the internal caller (which already
162
+ // holds the lock) runs to completion.
163
+ const { status } = reindex({ repoRoot: root, skipLock: true });
164
+ expect(status).toBe(0);
165
+ expect(fs.existsSync(path.join(root, ".claude/skills"))).toBe(true);
166
+ });
167
+
168
+ it("releases the lock after a normal run (no leftover lock file)", () => {
169
+ reindex({ repoRoot: root });
170
+ expect(fs.existsSync(lockPathFor(root))).toBe(false);
171
+ });
127
172
  });
@@ -17,10 +17,24 @@
17
17
  import { spawnSync } from "child_process";
18
18
  import * as fs from "fs";
19
19
  import * as path from "path";
20
+ import {
21
+ acquireOperationLock,
22
+ OperationLockedError,
23
+ OPERATION_LOCKED_EXIT,
24
+ type LockHandle,
25
+ } from "../operation-lock.js";
20
26
 
21
27
  export interface ReindexOptions {
22
28
  /** HQ root to operate on. Defaults to process.cwd(). */
23
29
  repoRoot?: string;
30
+ /**
31
+ * Skip the per-root operation lock. Internal callers (`sync()` / `rescue()`)
32
+ * already hold the lock for this root and pass `true` so reindex doesn't try
33
+ * to re-acquire it and refuse against their own live PID. Standalone callers
34
+ * (`hq reindex`, the reindex hook) leave it falsy so reindex is mutually
35
+ * exclusive with a running sync/rescue.
36
+ */
37
+ skipLock?: boolean;
24
38
  }
25
39
 
26
40
  export interface ReindexResult {
@@ -129,6 +143,25 @@ export function reindex(opts: ReindexOptions = {}): ReindexResult {
129
143
  }
130
144
  const root = path.resolve(rawRoot);
131
145
 
146
+ // Acquire the per-root operation lock unless an internal caller (sync/rescue,
147
+ // which already hold it) opted out. A live holder → refuse fast with the
148
+ // holder's command + PID. The whole body runs inside the try so the lock is
149
+ // released on every exit path (the process-level signal/exit hooks are the
150
+ // crash backstop).
151
+ let opLock: LockHandle | null = null;
152
+ if (!opts.skipLock) {
153
+ try {
154
+ opLock = acquireOperationLock(root, "reindex");
155
+ } catch (err) {
156
+ if (err instanceof OperationLockedError) {
157
+ warn(err.message);
158
+ return { status: OPERATION_LOCKED_EXIT };
159
+ }
160
+ throw err;
161
+ }
162
+ }
163
+ try {
164
+
132
165
  fs.mkdirSync(path.join(root, ".claude", "skills"), { recursive: true });
133
166
 
134
167
  // --- Build (namespace, src_rel) pairs -------------------------------------
@@ -363,4 +396,7 @@ export function reindex(opts: ReindexOptions = {}): ReindexResult {
363
396
  }
364
397
 
365
398
  return { status: 0 };
399
+ } finally {
400
+ opLock?.release();
401
+ }
366
402
  }