@indigoai-us/hq-cloud 6.7.1 → 6.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/dist/bin/sync-runner.d.ts.map +1 -1
  2. package/dist/bin/sync-runner.js +33 -1
  3. package/dist/bin/sync-runner.js.map +1 -1
  4. package/dist/bin/sync-runner.test.js +73 -4
  5. package/dist/bin/sync-runner.test.js.map +1 -1
  6. package/dist/cli/reindex.d.ts +11 -0
  7. package/dist/cli/reindex.d.ts.map +1 -1
  8. package/dist/cli/reindex.js +1 -1
  9. package/dist/cli/reindex.js.map +1 -1
  10. package/dist/cli/reindex.test.js +5 -4
  11. package/dist/cli/reindex.test.js.map +1 -1
  12. package/dist/cli/rescue.d.ts +20 -0
  13. package/dist/cli/rescue.d.ts.map +1 -1
  14. package/dist/cli/rescue.js +36 -2
  15. package/dist/cli/rescue.js.map +1 -1
  16. package/dist/cli/rescue.test.js +38 -1
  17. package/dist/cli/rescue.test.js.map +1 -1
  18. package/dist/cli/share.d.ts.map +1 -1
  19. package/dist/cli/share.js +104 -8
  20. package/dist/cli/share.js.map +1 -1
  21. package/dist/cli/share.test.js +190 -20
  22. package/dist/cli/share.test.js.map +1 -1
  23. package/dist/cognito-auth.d.ts.map +1 -1
  24. package/dist/cognito-auth.js +9 -1
  25. package/dist/cognito-auth.js.map +1 -1
  26. package/dist/machine-auth.test.js +4 -2
  27. package/dist/machine-auth.test.js.map +1 -1
  28. package/dist/object-io.d.ts +28 -2
  29. package/dist/object-io.d.ts.map +1 -1
  30. package/dist/object-io.js +76 -5
  31. package/dist/object-io.js.map +1 -1
  32. package/dist/object-io.test.js +93 -2
  33. package/dist/object-io.test.js.map +1 -1
  34. package/dist/operation-lock.d.ts +81 -10
  35. package/dist/operation-lock.d.ts.map +1 -1
  36. package/dist/operation-lock.js +177 -27
  37. package/dist/operation-lock.js.map +1 -1
  38. package/dist/operation-lock.test.js +122 -11
  39. package/dist/operation-lock.test.js.map +1 -1
  40. package/dist/s3.d.ts +3 -2
  41. package/dist/s3.d.ts.map +1 -1
  42. package/dist/s3.js +10 -5
  43. package/dist/s3.js.map +1 -1
  44. package/dist/vault-client.d.ts +9 -0
  45. package/dist/vault-client.d.ts.map +1 -1
  46. package/dist/vault-client.js.map +1 -1
  47. package/package.json +1 -1
  48. package/src/bin/sync-runner.test.ts +83 -4
  49. package/src/bin/sync-runner.ts +39 -1
  50. package/src/cli/reindex.test.ts +5 -4
  51. package/src/cli/reindex.ts +12 -1
  52. package/src/cli/rescue.test.ts +43 -1
  53. package/src/cli/rescue.ts +48 -2
  54. package/src/cli/share.test.ts +245 -9
  55. package/src/cli/share.ts +116 -8
  56. package/src/cognito-auth.ts +9 -1
  57. package/src/machine-auth.test.ts +4 -2
  58. package/src/object-io.test.ts +105 -2
  59. package/src/object-io.ts +121 -8
  60. package/src/operation-lock.test.ts +147 -10
  61. package/src/operation-lock.ts +234 -26
  62. package/src/s3.ts +11 -4
  63. package/src/vault-client.ts +9 -0
@@ -109,6 +109,30 @@ describe("factory selection", () => {
109
109
  });
110
110
  });
111
111
 
112
+ describe("S3SdkObjectIO.putObject — conditional-write fence", () => {
113
+ it("maps ifMatch (re-quoted) and ifNoneMatch onto the PutObjectCommand", async () => {
114
+ const io = new S3SdkObjectIO(ctx());
115
+ const sent: Array<Record<string, unknown>> = [];
116
+ // Swap the private client for a recorder — the fence mapping is pure
117
+ // input-shaping, no wire needed.
118
+ (io as unknown as { client: { send: (c: { input: Record<string, unknown> }) => Promise<unknown> } }).client = {
119
+ send: async (cmd) => {
120
+ sent.push(cmd.input);
121
+ return { ETag: '"landed"' };
122
+ },
123
+ };
124
+ await io.putObject({ key: "a", body: Buffer.from("x"), contentType: "t", ifMatch: "abc" });
125
+ expect(sent[0].IfMatch).toBe('"abc"'); // stripped etags re-quoted for If-Match
126
+ await io.putObject({ key: "a2", body: Buffer.from("x"), contentType: "t", ifMatch: '"quoted"' });
127
+ expect(sent[1].IfMatch).toBe('"quoted"'); // already-quoted passes through
128
+ await io.putObject({ key: "b", body: Buffer.from("y"), contentType: "t", ifNoneMatch: "*" });
129
+ expect(sent[2].IfNoneMatch).toBe("*");
130
+ await io.putObject({ key: "c", body: Buffer.from("z"), contentType: "t" });
131
+ expect(sent[3].IfMatch).toBeUndefined(); // unfenced PUT stays unfenced
132
+ expect(sent[3].IfNoneMatch).toBeUndefined();
133
+ });
134
+ });
135
+
112
136
  describe("PresignObjectIO.putObject", () => {
113
137
  let fetchMock: ReturnType<typeof vi.fn>;
114
138
  beforeEach(() => {
@@ -190,6 +214,58 @@ describe("PresignObjectIO.putObject", () => {
190
214
  io.putObject({ key: "k", body: Buffer.from("z"), contentType: "x" }),
191
215
  ).rejects.toThrow(/presigned PUT failed for k: 403/);
192
216
  });
217
+
218
+ it("forwards the conditional-write fence on the presign request (ifMatch stripped, ifNoneMatch verbatim)", async () => {
219
+ // The server signs If-Match/If-None-Match into the URL (hq-pro follow-up)
220
+ // and echoes the headers for replay; the client's job is to ASK. Servers
221
+ // that predate the fields ignore them — never broken, just unconditional.
222
+ const { vault, presignCalls, setPresign } = makeVault();
223
+ setPresign([{ key: "fenced.md", op: "put", url: "https://s3/put-url" }]);
224
+ fetchMock.mockResolvedValue(
225
+ new Response(null, { status: 200, headers: { etag: '"v2"' } }),
226
+ );
227
+ const io = new PresignObjectIO(vault, COMPANY);
228
+ await io.putObject({
229
+ key: "fenced.md",
230
+ body: Buffer.from("x"),
231
+ contentType: "text/markdown",
232
+ ifMatch: '"v1"',
233
+ });
234
+ expect(presignCalls[0].keys[0]).toMatchObject({
235
+ key: "fenced.md",
236
+ op: "put",
237
+ ifMatch: "v1", // quotes stripped on the wire; server re-quotes when signing
238
+ });
239
+
240
+ await io.putObject({
241
+ key: "fresh.md",
242
+ body: Buffer.from("y"),
243
+ contentType: "text/markdown",
244
+ ifNoneMatch: "*",
245
+ });
246
+ expect(presignCalls[1].keys[0]).toMatchObject({
247
+ key: "fresh.md",
248
+ op: "put",
249
+ ifNoneMatch: "*",
250
+ });
251
+ });
252
+
253
+ it("maps a 412 PUT response to name:PreconditionFailed (the fence fired — conflict, not failure)", async () => {
254
+ const { vault, setPresign } = makeVault();
255
+ setPresign([{ key: "raced.md", op: "put", url: "https://s3/put-url" }]);
256
+ fetchMock.mockResolvedValue(
257
+ new Response("PreconditionFailed", { status: 412 }),
258
+ );
259
+ const io = new PresignObjectIO(vault, COMPANY);
260
+ await expect(
261
+ io.putObject({
262
+ key: "raced.md",
263
+ body: Buffer.from("z"),
264
+ contentType: "text/plain",
265
+ ifMatch: "old-etag",
266
+ }),
267
+ ).rejects.toMatchObject({ name: "PreconditionFailed" });
268
+ });
193
269
  });
194
270
 
195
271
  describe("PresignObjectIO.getObject", () => {
@@ -354,13 +430,40 @@ describe("PresignObjectIO.headObject", () => {
354
430
  expect(await io.headObject("gone")).toBeNull();
355
431
  });
356
432
 
357
- it("returns null when presign denies the key (no usable head)", async () => {
433
+ it("throws Forbidden when presign denies the key denial is NOT absence", async () => {
434
+ // Regression: pre-fix this returned null ("absent"), which made
435
+ // share.ts's push guard (`if (remoteMeta)`) skip every conflict check
436
+ // and issue an unconditional PUT — a transient denial mid-pass silently
437
+ // clobbered newer remote bytes (2026-06-10..12 vault regression storm).
358
438
  const { vault, setPresign } = makeVault();
359
439
  setPresign([{ key: "secret/x", op: "get", error: "forbidden" }]);
360
440
  const io = new PresignObjectIO(vault, COMPANY);
361
- expect(await io.headObject("secret/x")).toBeNull();
441
+ await expect(io.headObject("secret/x")).rejects.toMatchObject({
442
+ name: "Forbidden",
443
+ });
362
444
  expect(fetchMock).not.toHaveBeenCalled();
363
445
  });
446
+
447
+ it("throws Forbidden on a presigned-GET 403 — same SDK-parity contract", async () => {
448
+ // Expired presigned URL, expired signing creds, KMS or bucket-policy
449
+ // denial all surface as 403 on the signed GET. Unknown state — must
450
+ // never read as "object missing".
451
+ const { vault, setPresign } = makeVault();
452
+ setPresign([{ key: "shared/a.md", op: "get", url: "https://s3/get" }]);
453
+ fetchMock.mockResolvedValue(new Response("AccessDenied", { status: 403 }));
454
+ const io = new PresignObjectIO(vault, COMPANY);
455
+ await expect(io.headObject("shared/a.md")).rejects.toMatchObject({
456
+ name: "Forbidden",
457
+ });
458
+ });
459
+
460
+ it("still returns null on a definitive 404 (key truly absent)", async () => {
461
+ const { vault, setPresign } = makeVault();
462
+ setPresign([{ key: "gone2", op: "get", url: "https://s3/get" }]);
463
+ fetchMock.mockResolvedValue(new Response("", { status: 404 }));
464
+ const io = new PresignObjectIO(vault, COMPANY);
465
+ expect(await io.headObject("gone2")).toBeNull();
466
+ });
364
467
  });
365
468
 
366
469
  /**
package/src/object-io.ts CHANGED
@@ -68,7 +68,29 @@ export interface PresignTransportClient {
68
68
  // Wire-primitive shapes
69
69
  // ---------------------------------------------------------------------------
70
70
 
71
- export interface PutObjectInput {
71
+ /**
72
+ * Conditional-write fence for a PUT (S3 conditional writes, GA 2024-11).
73
+ *
74
+ * `ifMatch` — only land the PUT if the remote object's ETag still equals
75
+ * this value (the journal baseline / last-observed HEAD). `ifNoneMatch: "*"`
76
+ * — only land the PUT if NO object exists at the key (creation fence).
77
+ * Either mismatch makes S3 reject with 412 PreconditionFailed, which the
78
+ * push path surfaces as a conflict instead of a silent overwrite.
79
+ *
80
+ * This is the storage-level backstop for the entire stale-clobber class:
81
+ * a HEAD-then-PUT race, a transport bug that misreads remote state, or an
82
+ * outdated client mid-pass can no longer regress a newer remote object —
83
+ * S3 itself refuses. (The 2026-06-10..12 vault regression storm was this
84
+ * class: stale machine copies blind-PUT over newer objects.)
85
+ */
86
+ export interface PutPrecondition {
87
+ /** Land only if the current remote ETag equals this (quotes optional). */
88
+ ifMatch?: string;
89
+ /** Land only if no object exists at the key. */
90
+ ifNoneMatch?: "*";
91
+ }
92
+
93
+ export interface PutObjectInput extends PutPrecondition {
72
94
  key: string;
73
95
  body: Buffer;
74
96
  contentType: string;
@@ -117,7 +139,12 @@ export interface ObjectIO {
117
139
  getObject(key: string): Promise<GetObjectResult>;
118
140
  listObjects(input: ListObjectsInput): Promise<ListObjectsResult>;
119
141
  deleteObject(key: string): Promise<void>;
120
- /** Null when the key does not exist (404 / 403-as-absent). */
142
+ /**
143
+ * Null ONLY when the key definitively does not exist (404). Access denial
144
+ * (403 / per-key presign denial) THROWS a `name: "Forbidden"` error — it is
145
+ * unknown state, never "absent". Conflating the two disables push-side
146
+ * conflict guards and clobbers newer remote objects.
147
+ */
121
148
  headObject(key: string): Promise<HeadObjectResult | null>;
122
149
  /**
123
150
  * Optional batch pre-mint. Warms an internal URL cache for `keys` under `op`
@@ -192,6 +219,13 @@ export class S3SdkObjectIO implements ObjectIO {
192
219
  ...(input.metadata && Object.keys(input.metadata).length > 0
193
220
  ? { Metadata: input.metadata }
194
221
  : {}),
222
+ // Conditional-write fence. If-Match wants the quoted entity-tag form;
223
+ // callers hand us journal/HEAD etags that may be stripped — re-quote
224
+ // so both shapes fence identically. A mismatch surfaces as the SDK's
225
+ // name:"PreconditionFailed" (HTTP 412), which the push path maps to
226
+ // its conflict flow.
227
+ ...(input.ifMatch ? { IfMatch: quoteEtag(input.ifMatch) } : {}),
228
+ ...(input.ifNoneMatch ? { IfNoneMatch: input.ifNoneMatch } : {}),
195
229
  }),
196
230
  );
197
231
  return { etag: res.ETag || "" };
@@ -311,6 +345,44 @@ function notFoundError(key: string): Error {
311
345
  return Object.assign(new Error(`Not found: ${key}`), { name: "NotFound" });
312
346
  }
313
347
 
348
+ /**
349
+ * An error shaped like the AWS SDK's HeadObject 403 (`name: "Forbidden"`) so
350
+ * presigned-transport denials route through the SAME catch sites as SDK ones
351
+ * (share.ts / sync.ts `isAccessDenied`: name === "AccessDenied" | "Forbidden").
352
+ * Critically this is NOT `null`: "can't read the key" must never be conflated
353
+ * with "the key does not exist" — that conflation let a transient 403 episode
354
+ * disable every push-side conflict guard and clobber newer remote objects.
355
+ */
356
+ function accessDeniedError(key: string, detail: string): Error {
357
+ return Object.assign(
358
+ new Error(`Access denied for ${key}: ${detail}`),
359
+ { name: "Forbidden" },
360
+ );
361
+ }
362
+
363
+ /**
364
+ * An error shaped like the AWS SDK's 412 (`name: "PreconditionFailed"`) so
365
+ * presigned-transport conditional-write rejections route through the same
366
+ * catch sites as SDK ones. A 412 means the fence WORKED: the remote moved
367
+ * past the caller's baseline (If-Match) or the key already exists
368
+ * (If-None-Match) — surface as a conflict, never overwrite.
369
+ */
370
+ function preconditionFailedError(key: string, detail: string): Error {
371
+ return Object.assign(
372
+ new Error(`Precondition failed for ${key}: ${detail}`),
373
+ { name: "PreconditionFailed" },
374
+ );
375
+ }
376
+
377
+ /**
378
+ * If-Match compares quoted entity-tags. Journal baselines store etags
379
+ * stripped (normalizeEtag) while SDK HEADs return them quoted — accept both
380
+ * and emit the canonical quoted form.
381
+ */
382
+ function quoteEtag(etag: string): string {
383
+ return etag.startsWith('"') ? etag : `"${etag}"`;
384
+ }
385
+
314
386
  /**
315
387
  * Max keys per presign request when priming — the server's hard batch cap
316
388
  * (hq-pro files-presign MAX_BATCH_KEYS = 1000). One presign call costs ONE
@@ -435,7 +507,13 @@ export class PresignObjectIO implements ObjectIO {
435
507
  private async resolveUrl(
436
508
  op: PresignOp,
437
509
  key: string,
438
- extra?: { contentType?: string; metadata?: Record<string, string> },
510
+ extra?: {
511
+ contentType?: string;
512
+ metadata?: Record<string, string>;
513
+ /** Conditional-write fence for PUT presigns — see PutPrecondition. */
514
+ ifMatch?: string;
515
+ ifNoneMatch?: "*";
516
+ },
439
517
  ): Promise<{ url: string; headers?: Record<string, string> }> {
440
518
  const hit = this.cached(op, key);
441
519
  if (hit) return { url: hit.url, headers: hit.headers };
@@ -509,11 +587,23 @@ export class PresignObjectIO implements ObjectIO {
509
587
  }
510
588
 
511
589
  async putObject(input: PutObjectInput): Promise<{ etag: string }> {
590
+ // Conditional-write fields (ifMatch/ifNoneMatch) are forwarded on the
591
+ // presign request so the server can sign If-Match/If-None-Match into the
592
+ // URL and echo them via `headers` for replay. Until hq-pro's
593
+ // files-presign signs them (follow-up to this PR), the server ignores
594
+ // the fields and the returned header set carries no condition — the PUT
595
+ // stays unconditional on this transport, exactly today's behavior. We
596
+ // deliberately do NOT inject the header client-side: an unsigned
597
+ // conditional header breaks the SigV4 signature. Enforcement on this
598
+ // transport activates the moment the server starts signing; the SDK
599
+ // transport enforces immediately.
512
600
  const row = await this.resolveUrl("put", input.key, {
513
601
  contentType: input.contentType,
514
602
  ...(input.metadata && Object.keys(input.metadata).length > 0
515
603
  ? { metadata: input.metadata }
516
604
  : {}),
605
+ ...(input.ifMatch ? { ifMatch: stripQuotes(input.ifMatch) } : {}),
606
+ ...(input.ifNoneMatch ? { ifNoneMatch: input.ifNoneMatch } : {}),
517
607
  });
518
608
  // The server signs Content-Type, SSE-KMS, and every x-amz-meta-* into the
519
609
  // signature and returns them in `headers`; they MUST be replayed verbatim
@@ -523,6 +613,14 @@ export class PresignObjectIO implements ObjectIO {
523
613
  { method: "PUT", body: input.body, headers: row.headers ?? {} },
524
614
  `presigned PUT ${input.key}`,
525
615
  );
616
+ if (res.status === 412) {
617
+ // The signed conditional header fenced this write off: the remote
618
+ // moved past our baseline (If-Match) or the key already exists
619
+ // (If-None-Match). Same shape as the SDK's PreconditionFailed so the
620
+ // push path routes both transports through one conflict handler.
621
+ const detail = await safeText(res);
622
+ throw preconditionFailedError(input.key, detail);
623
+ }
526
624
  if (!res.ok) {
527
625
  const detail = await safeText(res);
528
626
  throw new Error(
@@ -607,18 +705,33 @@ export class PresignObjectIO implements ObjectIO {
607
705
  }
608
706
  const row = results[0];
609
707
  if (!row || row.error || !row.url) {
610
- // A per-key denial here means the caller can't read the key — treat as
611
- // absent for HEAD semantics (the SDK path would 403, which callers map
612
- // to "no usable head"); they all tolerate null.
613
- return null;
708
+ // A per-key denial means the caller can't READ the key — it says
709
+ // nothing about whether the object EXISTS. Pre-fix this returned
710
+ // null ("absent"), which made push call sites skip every conflict
711
+ // guard (`if (remoteMeta)`) and issue an UNCONDITIONAL PUT — a
712
+ // transient denial episode mid-pass silently clobbered newer remote
713
+ // bytes with this machine's stale copy (the 2026-06-10..12 vault
714
+ // regression storm). Throw the same access-denied shape the SDK
715
+ // transport raises so callers route through their existing
716
+ // isAccessDenied skip/defer paths instead of "object missing".
717
+ throw accessDeniedError(key, row?.error ?? "presign denied");
614
718
  }
615
719
  url = row.url;
616
720
  }
617
721
  const res = await fetchWithRetry(url, { method: "GET" }, `presigned HEAD ${key}`);
618
- if (res.status === 404 || res.status === 403) {
722
+ if (res.status === 404) {
619
723
  await cancelBody(res);
620
724
  return null;
621
725
  }
726
+ if (res.status === 403) {
727
+ // 403 on the signed GET (expired URL, expired signing creds, KMS or
728
+ // bucket-policy denial) is UNKNOWN state, not absence — see the presign
729
+ // denial branch above. The SDK transport throws name:"Forbidden" here;
730
+ // mirror it so both transports agree and no caller mistakes a denial
731
+ // for a missing object.
732
+ await cancelBody(res);
733
+ throw accessDeniedError(key, "presigned HEAD returned 403");
734
+ }
622
735
  if (!res.ok) {
623
736
  await cancelBody(res);
624
737
  const detail = await safeText(res);
@@ -9,10 +9,13 @@ import * as os from "os";
9
9
  import * as path from "path";
10
10
  import {
11
11
  acquireOperationLock,
12
+ acquireOperationLockAsync,
13
+ withOperationLock,
12
14
  withOperationLockSync,
13
15
  lockPathFor,
14
16
  OperationLockedError,
15
17
  OPERATION_LOCKED_EXIT,
18
+ DEFAULT_LOCK_POLL_MS,
16
19
  type LockInfo,
17
20
  } from "./operation-lock.js";
18
21
 
@@ -44,6 +47,7 @@ describe("operation-lock", () => {
44
47
  stateDir = fs.mkdtempSync(path.join(os.tmpdir(), "hq-oplock-state-"));
45
48
  process.env.HQ_STATE_DIR = stateDir;
46
49
  delete process.env.HQ_DISABLE_OP_LOCK;
50
+ delete process.env.HQ_OP_LOCK_TIMEOUT;
47
51
  rootA = fs.mkdtempSync(path.join(os.tmpdir(), "hq-rootA-"));
48
52
  rootB = fs.mkdtempSync(path.join(os.tmpdir(), "hq-rootB-"));
49
53
  });
@@ -54,6 +58,7 @@ describe("operation-lock", () => {
54
58
  fs.rmSync(rootB, { recursive: true, force: true });
55
59
  delete process.env.HQ_STATE_DIR;
56
60
  delete process.env.HQ_DISABLE_OP_LOCK;
61
+ delete process.env.HQ_OP_LOCK_TIMEOUT;
57
62
  });
58
63
 
59
64
  it("the lock path is under the state dir, keyed per canonical root", () => {
@@ -75,14 +80,17 @@ describe("operation-lock", () => {
75
80
  expect(fs.existsSync(h.path)).toBe(false);
76
81
  });
77
82
 
78
- it("refuses fast with the holder's command + pid when a LIVE process holds it", () => {
83
+ it("refuses immediately (wait:false) with the holder's command + pid when a LIVE process holds it", () => {
79
84
  // Simulate a DIFFERENT live process holding the lock. PID 1 (init/systemd)
80
85
  // is always alive and is never our own pid, so kill(1,0) reports alive and
81
- // the same-process reclaim path does not apply.
86
+ // the same-process reclaim path does not apply. `wait:false` keeps the old
87
+ // refuse-immediately behavior (the default is now to WAIT).
82
88
  writeLock(lockPathFor(rootA), { pid: 1, command: "rescue" });
83
- expect(() => acquireOperationLock(rootA, "sync")).toThrowError(OperationLockedError);
89
+ expect(() => acquireOperationLock(rootA, "sync", { wait: false })).toThrowError(
90
+ OperationLockedError,
91
+ );
84
92
  try {
85
- acquireOperationLock(rootA, "sync");
93
+ acquireOperationLock(rootA, "sync", { wait: false });
86
94
  } catch (e) {
87
95
  const err = e as OperationLockedError;
88
96
  expect(err.holder.command).toBe("rescue");
@@ -92,11 +100,124 @@ describe("operation-lock", () => {
92
100
  }
93
101
  });
94
102
 
95
- it("reclaims a stale lock whose holder PID is dead (takeover)", () => {
103
+ it("timeoutSec:0 refuses immediately (no wait) equivalent to wait:false", () => {
104
+ writeLock(lockPathFor(rootA), { pid: 1, command: "sync" });
105
+ const start = Date.now();
106
+ expect(() => acquireOperationLock(rootA, "reindex", { timeoutSec: 0 })).toThrowError(
107
+ OperationLockedError,
108
+ );
109
+ // Did not actually sleep.
110
+ expect(Date.now() - start).toBeLessThan(DEFAULT_LOCK_POLL_MS);
111
+ });
112
+
113
+ it("a bounded timeoutSec waits, then refuses with the old message + exit code", () => {
114
+ writeLock(lockPathFor(rootA), { pid: 1, command: "rescue" });
115
+ const start = Date.now();
116
+ let thrown: unknown;
117
+ try {
118
+ // 150ms bound, 40ms poll → waits ~150ms then gives up. Suppress the
119
+ // stderr status line with a no-op onWaitStart.
120
+ acquireOperationLock(rootA, "sync", {
121
+ timeoutSec: 0.15,
122
+ pollIntervalMs: 40,
123
+ onWaitStart: () => {},
124
+ });
125
+ } catch (e) {
126
+ thrown = e;
127
+ }
128
+ const elapsed = Date.now() - start;
129
+ expect(thrown).toBeInstanceOf(OperationLockedError);
130
+ expect((thrown as OperationLockedError).message).toContain("rescue");
131
+ expect(OPERATION_LOCKED_EXIT).toBe(17);
132
+ // It actually waited (didn't refuse instantly) but didn't hang forever.
133
+ expect(elapsed).toBeGreaterThanOrEqual(120);
134
+ expect(elapsed).toBeLessThan(3000);
135
+ });
136
+
137
+ it("HQ_OP_LOCK_TIMEOUT env bounds the wait when no explicit option is given", () => {
138
+ process.env.HQ_OP_LOCK_TIMEOUT = "0"; // 0 → refuse immediately
139
+ writeLock(lockPathFor(rootA), { pid: 1, command: "sync" });
140
+ const start = Date.now();
141
+ expect(() =>
142
+ acquireOperationLock(rootA, "rescue", { onWaitStart: () => {} }),
143
+ ).toThrowError(OperationLockedError);
144
+ expect(Date.now() - start).toBeLessThan(DEFAULT_LOCK_POLL_MS);
145
+ });
146
+
147
+ it("an explicit timeoutSec overrides the HQ_OP_LOCK_TIMEOUT env", () => {
148
+ process.env.HQ_OP_LOCK_TIMEOUT = "9999"; // would be a near-infinite wait
149
+ writeLock(lockPathFor(rootA), { pid: 1, command: "sync" });
150
+ // The explicit 0 wins → refuse immediately rather than honoring the env.
151
+ expect(() =>
152
+ acquireOperationLock(rootA, "rescue", { timeoutSec: 0 }),
153
+ ).toThrowError(OperationLockedError);
154
+ });
155
+
156
+ it("onWaitStart fires exactly once, naming the holder, even across many polls", () => {
157
+ writeLock(lockPathFor(rootA), { pid: 1, command: "rescue" });
158
+ const calls: Array<{ cmd: string; attempted: string }> = [];
159
+ expect(() =>
160
+ acquireOperationLock(rootA, "sync", {
161
+ timeoutSec: 0.16,
162
+ pollIntervalMs: 30, // ~5 polls within the window
163
+ onWaitStart: (holder, attempted) => calls.push({ cmd: holder.command, attempted }),
164
+ }),
165
+ ).toThrowError(OperationLockedError);
166
+ expect(calls).toHaveLength(1);
167
+ expect(calls[0]).toEqual({ cmd: "rescue", attempted: "sync" });
168
+ });
169
+
170
+ it("a waiter acquires the lock the moment the holder releases (async poll path)", async () => {
171
+ const p = lockPathFor(rootA);
172
+ // A foreign LIVE holder (pid 1) initially owns the lock.
173
+ writeLock(p, { pid: 1, command: "rescue" });
174
+ // Simulate the holder finishing ~80ms in by removing its lock file.
175
+ const release = setTimeout(() => fs.rmSync(p, { force: true }), 80);
176
+ const start = Date.now();
177
+ const h = await acquireOperationLockAsync(rootA, "sync", {
178
+ pollIntervalMs: 20,
179
+ onWaitStart: () => {},
180
+ });
181
+ clearTimeout(release);
182
+ const elapsed = Date.now() - start;
183
+ // We waited for the release, then took it over.
184
+ expect(elapsed).toBeGreaterThanOrEqual(60);
185
+ const info = JSON.parse(fs.readFileSync(h.path, "utf8")) as LockInfo;
186
+ expect(info.pid).toBe(process.pid);
187
+ expect(info.command).toBe("sync");
188
+ h.release();
189
+ });
190
+
191
+ it("multiple foreign holders in a row: each release lets the next waiter in (no FIFO guarantee)", async () => {
192
+ // The mutex is CROSS-PROCESS: it keys liveness on the holder's PID. Two
193
+ // waiters in the SAME process share a pid, so the same-process reclaim path
194
+ // would let them stomp each other — that scenario is unsupported by design.
195
+ // Here we model the real case: a sequence of FOREIGN holders (pid 1) that
196
+ // each release, with a single waiter acquiring the instant the lock frees.
197
+ // Order among multiple distinct-process waiters is whoever wins the next
198
+ // O_EXCL race after a free — best-effort, NOT FIFO (documented).
199
+ const p = lockPathFor(rootA);
200
+ writeLock(p, { pid: 1, command: "sync" });
201
+ // Free it shortly; the waiter should grab it right after.
202
+ setTimeout(() => fs.rmSync(p, { force: true }), 50);
203
+ const h = await acquireOperationLockAsync(rootA, "reindex", {
204
+ pollIntervalMs: 15,
205
+ onWaitStart: () => {},
206
+ });
207
+ const info = JSON.parse(fs.readFileSync(h.path, "utf8")) as LockInfo;
208
+ expect(info.command).toBe("reindex");
209
+ expect(info.pid).toBe(process.pid);
210
+ h.release();
211
+ });
212
+
213
+ it("reclaims a stale lock whose holder PID is dead (takeover, never waits)", () => {
96
214
  const stale = deadPid();
97
215
  writeLock(lockPathFor(rootA), { pid: stale, command: "sync" });
98
- // The dead holder must not block us.
216
+ const start = Date.now();
217
+ // The dead holder must not block us — even with an infinite default wait,
218
+ // takeover is immediate.
99
219
  const h = acquireOperationLock(rootA, "rescue");
220
+ expect(Date.now() - start).toBeLessThan(DEFAULT_LOCK_POLL_MS);
100
221
  const info = JSON.parse(fs.readFileSync(h.path, "utf8")) as LockInfo;
101
222
  expect(info.pid).toBe(process.pid); // we took it over
102
223
  expect(info.command).toBe("rescue");
@@ -114,7 +235,7 @@ describe("operation-lock", () => {
114
235
 
115
236
  it("different HQ roots are independent — both may hold concurrently", () => {
116
237
  const a = acquireOperationLock(rootA, "sync");
117
- const b = acquireOperationLock(rootB, "rescue"); // must NOT refuse
238
+ const b = acquireOperationLock(rootB, "rescue"); // must NOT block
118
239
  expect(fs.existsSync(a.path)).toBe(true);
119
240
  expect(fs.existsSync(b.path)).toBe(true);
120
241
  expect(a.path).not.toBe(b.path);
@@ -126,9 +247,14 @@ describe("operation-lock", () => {
126
247
  // A live sync in ANOTHER process holds the root (pid 1 stands in for it).
127
248
  const p = lockPathFor(rootA);
128
249
  writeLock(p, { pid: 1, command: "sync" });
129
- // Neither rescue nor reindex may acquire while that sync holds it.
130
- expect(() => acquireOperationLock(rootA, "rescue")).toThrowError(OperationLockedError);
131
- expect(() => acquireOperationLock(rootA, "reindex")).toThrowError(OperationLockedError);
250
+ // Neither rescue nor reindex may acquire while that sync holds it
251
+ // (wait:false assert the refusal without hanging on the new wait default).
252
+ expect(() => acquireOperationLock(rootA, "rescue", { wait: false })).toThrowError(
253
+ OperationLockedError,
254
+ );
255
+ expect(() => acquireOperationLock(rootA, "reindex", { wait: false })).toThrowError(
256
+ OperationLockedError,
257
+ );
132
258
  // Once that sync finishes (its lock is gone), the next command acquires.
133
259
  fs.unlinkSync(p);
134
260
  const h2 = acquireOperationLock(rootA, "reindex");
@@ -147,6 +273,17 @@ describe("operation-lock", () => {
147
273
  expect(fs.existsSync(p)).toBe(false); // released on the way out
148
274
  });
149
275
 
276
+ it("withOperationLock (async) releases even when the body throws", async () => {
277
+ const p = lockPathFor(rootA);
278
+ await expect(
279
+ withOperationLock(rootA, "sync", async () => {
280
+ expect(fs.existsSync(p)).toBe(true);
281
+ throw new Error("boom");
282
+ }),
283
+ ).rejects.toThrow("boom");
284
+ expect(fs.existsSync(p)).toBe(false);
285
+ });
286
+
150
287
  it("HQ_DISABLE_OP_LOCK=1 makes acquisition a no-op", () => {
151
288
  process.env.HQ_DISABLE_OP_LOCK = "1";
152
289
  // Even with a live holder on record, the escape hatch acquires without error.