@checkstack/backend-api 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +205 -0
  2. package/package.json +12 -11
  3. package/src/advisory-lock-pool.it.test.ts +282 -0
  4. package/src/advisory-lock.test.ts +144 -3
  5. package/src/advisory-lock.ts +97 -55
  6. package/src/auth-strategy.ts +6 -3
  7. package/src/bearer-token.ts +13 -0
  8. package/src/collector-strategy.ts +9 -0
  9. package/src/config-versioning.test.ts +227 -0
  10. package/src/config-versioning.ts +172 -0
  11. package/src/core-services.ts +14 -0
  12. package/src/esm-script-runner.test.ts +55 -16
  13. package/src/esm-script-runner.ts +212 -55
  14. package/src/index.ts +3 -0
  15. package/src/render-templatable-config.test.ts +168 -0
  16. package/src/render-templatable-config.ts +193 -0
  17. package/src/schema-utils.ts +3 -0
  18. package/src/script-sandbox/capabilities.test.ts +122 -0
  19. package/src/script-sandbox/capabilities.ts +372 -0
  20. package/src/script-sandbox/capped-output.test.ts +116 -0
  21. package/src/script-sandbox/capped-output.ts +172 -0
  22. package/src/script-sandbox/env-guard.test.ts +105 -0
  23. package/src/script-sandbox/env-guard.ts +129 -0
  24. package/src/script-sandbox/filesystem.test.ts +437 -0
  25. package/src/script-sandbox/filesystem.ts +514 -0
  26. package/src/script-sandbox/forkbomb.it.test.ts +121 -0
  27. package/src/script-sandbox/global-default.test.ts +161 -0
  28. package/src/script-sandbox/global-default.ts +100 -0
  29. package/src/script-sandbox/index.ts +14 -0
  30. package/src/script-sandbox/network.test.ts +356 -0
  31. package/src/script-sandbox/network.ts +373 -0
  32. package/src/script-sandbox/observability.test.ts +210 -0
  33. package/src/script-sandbox/observability.ts +168 -0
  34. package/src/script-sandbox/output-truncation.test.ts +53 -0
  35. package/src/script-sandbox/output-truncation.ts +69 -0
  36. package/src/script-sandbox/policy.test.ts +189 -0
  37. package/src/script-sandbox/policy.ts +220 -0
  38. package/src/script-sandbox/provider.test.ts +61 -0
  39. package/src/script-sandbox/provider.ts +134 -0
  40. package/src/script-sandbox/readiness.test.ts +80 -0
  41. package/src/script-sandbox/readiness.ts +117 -0
  42. package/src/script-sandbox/report.ts +88 -0
  43. package/src/script-sandbox/rootless-egress.it.test.ts +86 -0
  44. package/src/script-sandbox/rootless-egress.test.ts +99 -0
  45. package/src/script-sandbox/rootless-egress.ts +218 -0
  46. package/src/script-sandbox/shell-quote.test.ts +32 -0
  47. package/src/script-sandbox/shell-quote.ts +10 -0
  48. package/src/script-sandbox/wrapper.test.ts +1194 -0
  49. package/src/script-sandbox/wrapper.ts +714 -0
  50. package/src/shell-script-runner.test.ts +243 -0
  51. package/src/shell-script-runner.ts +210 -45
  52. package/src/zod-config.test.ts +60 -0
  53. package/src/zod-config.ts +38 -14
  54. package/tsconfig.json +3 -0
@@ -7,8 +7,9 @@ import {
7
7
 
8
8
  /**
9
9
  * Faithful fake of a `pg.Pool` that models Postgres' per-connection
10
- * SESSION advisory-lock semantics:
10
+ * advisory-lock semantics for BOTH lock flavours:
11
11
  *
12
+ * SESSION locks (`tryAcquire`):
12
13
  * - A key can be held by at most one connection at a time.
13
14
  * - `pg_try_advisory_lock` succeeds only if the key is free; it then
14
15
  * binds the key to the acquiring connection.
@@ -16,8 +17,15 @@ import {
16
17
  * (a no-op otherwise) — exactly the bug we are guarding against: an
17
18
  * unlock issued on a different connection does nothing.
18
19
  *
19
- * This lets the test prove the service keeps acquire + release on ONE
20
- * client.
20
+ * TRANSACTION locks (`withXactLock`):
21
+ * - `pg_advisory_xact_lock` BLOCKS until the key is free, then binds it to
22
+ * the acquiring connection's transaction.
23
+ * - `COMMIT` / `ROLLBACK` release every xact lock held by that connection
24
+ * and wake the next blocked waiter (FIFO) — modelling auto-release and
25
+ * the serialization guarantee.
26
+ *
27
+ * This lets the tests prove the service keeps acquire + release on ONE
28
+ * client and that concurrent `withXactLock` callers serialize.
21
29
  */
22
30
  interface FakePool extends AdvisoryLockPool {
23
31
  checkedOut: number;
@@ -27,6 +35,9 @@ interface FakePool extends AdvisoryLockPool {
27
35
  function makeFakePool(): FakePool {
28
36
  // key -> owning connection id (or absent if free)
29
37
  const heldBy = new Map<string, number>();
38
+ // xact key -> owning connection id; waiters queued FIFO per key.
39
+ const xactHeldBy = new Map<string, number>();
40
+ const xactWaiters = new Map<string, Array<() => void>>();
30
41
  let nextConnId = 0;
31
42
  const counters = { checkedOut: 0, released: 0 };
32
43
 
@@ -46,8 +57,21 @@ function makeFakePool(): FakePool {
46
57
  async connect(): Promise<AdvisoryLockPoolClient> {
47
58
  const connId = nextConnId++;
48
59
  counters.checkedOut++;
60
+ const releaseXactLocks = () => {
61
+ for (const [key, owner] of [...xactHeldBy.entries()]) {
62
+ if (owner !== connId) continue;
63
+ xactHeldBy.delete(key);
64
+ const next = xactWaiters.get(key)?.shift();
65
+ if (next) next();
66
+ }
67
+ };
49
68
  return {
50
69
  async query<T>(queryText: string, values?: unknown[]) {
70
+ if (queryText === "BEGIN") return { rows: [] };
71
+ if (queryText === "COMMIT" || queryText === "ROLLBACK") {
72
+ releaseXactLocks();
73
+ return { rows: [] };
74
+ }
51
75
  const key = keyOf(values);
52
76
  if (queryText.includes("pg_try_advisory_lock")) {
53
77
  const owner = heldBy.get(key);
@@ -55,6 +79,22 @@ function makeFakePool(): FakePool {
55
79
  if (ok) heldBy.set(key, connId);
56
80
  return { rows: [{ ok } as unknown as T] };
57
81
  }
82
+ if (queryText.includes("pg_advisory_xact_lock")) {
83
+ if (!xactHeldBy.has(key)) {
84
+ xactHeldBy.set(key, connId);
85
+ return { rows: [] };
86
+ }
87
+ // Blocked: enqueue and wait until a holder commits/rolls back.
88
+ await new Promise<void>((resolve) => {
89
+ const q = xactWaiters.get(key) ?? [];
90
+ q.push(() => {
91
+ xactHeldBy.set(key, connId);
92
+ resolve();
93
+ });
94
+ xactWaiters.set(key, q);
95
+ });
96
+ return { rows: [] };
97
+ }
58
98
  if (queryText.includes("pg_advisory_unlock")) {
59
99
  // Only the owning connection can release — model the leak bug.
60
100
  if (heldBy.get(key) === connId) heldBy.delete(key);
@@ -70,6 +110,10 @@ function makeFakePool(): FakePool {
70
110
  // `on('error')` hardening is exercised by the IT against real
71
111
  // Postgres (killing the holding connection).
72
112
  },
113
+ off() {
114
+ // Counterpart to `on`; the service detaches its error listener on
115
+ // release. No-op here since the fake never attaches one.
116
+ },
73
117
  };
74
118
  },
75
119
  };
@@ -130,3 +174,100 @@ describe("createAdvisoryLockService", () => {
130
174
  await b!.release();
131
175
  });
132
176
  });
177
+
178
+ describe("createAdvisoryLockService.withXactLock", () => {
179
+ const tick = (ms = 5) => new Promise((r) => setTimeout(r, ms));
180
+
181
+ it("runs fn, returns its value, and releases the client", async () => {
182
+ const pool = makeFakePool();
183
+ const svc = createAdvisoryLockService(pool);
184
+ const result = await svc.withXactLock({ key: "k", fn: async () => 42 });
185
+ expect(result).toBe(42);
186
+ expect(pool.checkedOut).toBe(1);
187
+ expect(pool.released).toBe(1);
188
+ });
189
+
190
+ it("serializes concurrent calls on the same key (second fn waits for first to commit)", async () => {
191
+ const pool = makeFakePool();
192
+ const svc = createAdvisoryLockService(pool);
193
+ const order: string[] = [];
194
+
195
+ let releaseFirst!: () => void;
196
+ const firstHeld = new Promise<void>((r) => (releaseFirst = r));
197
+
198
+ const p1 = svc.withXactLock({
199
+ key: "k",
200
+ fn: async () => {
201
+ order.push("1-start");
202
+ await firstHeld;
203
+ order.push("1-end");
204
+ },
205
+ });
206
+
207
+ // Let p1 acquire the lock before p2 attempts it.
208
+ await tick();
209
+ const p2 = svc.withXactLock({
210
+ key: "k",
211
+ fn: async () => {
212
+ order.push("2-start");
213
+ },
214
+ });
215
+
216
+ // While p1 holds the lock, p2's fn must NOT have started.
217
+ await tick();
218
+ expect(order).toEqual(["1-start"]);
219
+
220
+ releaseFirst();
221
+ await Promise.all([p1, p2]);
222
+ expect(order).toEqual(["1-start", "1-end", "2-start"]);
223
+ expect(pool.released).toBe(2);
224
+ });
225
+
226
+ it("rolls back and releases the client when fn throws, freeing the lock", async () => {
227
+ const pool = makeFakePool();
228
+ const svc = createAdvisoryLockService(pool);
229
+
230
+ await expect(
231
+ svc.withXactLock({
232
+ key: "k",
233
+ fn: async () => {
234
+ throw new Error("boom");
235
+ },
236
+ }),
237
+ ).rejects.toThrow("boom");
238
+
239
+ // Lock was released on rollback: a subsequent acquire succeeds promptly.
240
+ const after = await svc.withXactLock({ key: "k", fn: async () => "ok" });
241
+ expect(after).toBe("ok");
242
+ expect(pool.released).toBe(2);
243
+ });
244
+
245
+ it("different keys do not serialize", async () => {
246
+ const pool = makeFakePool();
247
+ const svc = createAdvisoryLockService(pool);
248
+ const started: string[] = [];
249
+
250
+ let release!: () => void;
251
+ const held = new Promise<void>((r) => (release = r));
252
+
253
+ const pA = svc.withXactLock({
254
+ key: "a",
255
+ fn: async () => {
256
+ started.push("a");
257
+ await held;
258
+ },
259
+ });
260
+ await tick();
261
+ // Key "b" must run even while "a" is still held.
262
+ await svc.withXactLock({
263
+ key: "b",
264
+ fn: async () => {
265
+ started.push("b");
266
+ },
267
+ });
268
+ expect(started).toContain("b");
269
+
270
+ release();
271
+ await pA;
272
+ });
273
+ });
@@ -18,19 +18,25 @@
18
18
  * (e.g. an installer election held across a minutes-long `bun install`)
19
19
  * where a long-open transaction would be unacceptable.
20
20
  *
21
- * - {@link withXactLock} wraps acquire + work + release in a single
22
- * transaction using `pg_advisory_xact_lock`, which auto-releases at
23
- * COMMIT/ROLLBACK. Use this for SHORT critical sections (e.g. a
24
- * find-then-create dedup) where holding a transaction for the duration
25
- * is fine and the auto-release removes any chance of a leak.
21
+ * - {@link AdvisoryLockService.withXactLock} wraps acquire + work + release
22
+ * in a single transaction using `pg_advisory_xact_lock`, which auto-
23
+ * releases at COMMIT/ROLLBACK. Use this for SHORT critical sections (e.g. a
24
+ * find-then-create dedup) where holding a transaction for the duration is
25
+ * fine and the auto-release removes any chance of a leak.
26
+ *
27
+ * BOTH run on the service's pool, which MUST be a pool dedicated to advisory
28
+ * locks (separate from the pool the locked work runs on). A held lock keeps its
29
+ * connection checked out for the lock's lifetime; if lock and work shared one
30
+ * pool, concurrency >= pool size would deadlock (every slot a lock-holder
31
+ * waiting for a work connection). The backend wires this to a dedicated
32
+ * `lockPool`; that pool also sets `idle_in_transaction_session_timeout` /
33
+ * `lock_timeout` so a stalled critical section cannot strand a lock forever.
26
34
  *
27
35
  * Keys are arbitrary strings hashed to Postgres' 64-bit lock space via
28
36
  * `hashtextextended(key, 0)`. Callers SHOULD namespace keys (e.g.
29
37
  * `"script-packages.installer"`, `"incident.dedupe:<systemId>"`) since the
30
38
  * advisory-lock space is global to the database server, not schema-scoped.
31
39
  */
32
- import { sql } from "drizzle-orm";
33
- import type { SafeDatabase } from "./plugin-system";
34
40
 
35
41
  /**
36
42
  * Minimal pool surface this module needs. Modelled on `pg.Pool` /
@@ -45,13 +51,22 @@ export interface AdvisoryLockPoolClient {
45
51
  /** Return the client to the pool. */
46
52
  release(): void;
47
53
  /**
48
- * Subscribe to async client errors. A session-lock client is held for a long
49
- * time; if its backend dies (admin termination, failover, network drop) `pg`
50
- * emits `'error'` on the client, and an `'error'` with no listener is
51
- * re-thrown by the EventEmitter and would crash the pod. We attach a listener
52
- * so that loss degrades gracefully instead. Modelled on `pg.Client.on`.
54
+ * Subscribe to async client errors. A held client (session lock, or an open
55
+ * xact-lock transaction) is checked out for a while; if its backend dies
56
+ * (admin termination, failover, network drop) `pg` emits `'error'` on the
57
+ * client, and an `'error'` with no listener is re-thrown by the EventEmitter
58
+ * and would crash the pod. We attach a listener so that loss degrades
59
+ * gracefully instead. Modelled on `pg.Client.on`.
53
60
  */
54
61
  on(event: "error", listener: (err: Error) => void): void;
62
+ /**
63
+ * Detach a previously-attached error listener. MUST be called before
64
+ * returning the client to the pool: pooled clients are reused, so attaching a
65
+ * fresh listener on every checkout WITHOUT removing it on release leaks one
66
+ * listener per acquisition on each long-lived physical connection (an
67
+ * unbounded `MaxListenersExceeded` leak). Modelled on `pg.Client.off`.
68
+ */
69
+ off(event: "error", listener: (err: Error) => void): void;
55
70
  }
56
71
 
57
72
  export interface AdvisoryLockPool {
@@ -76,8 +91,30 @@ export interface AdvisoryLockService {
76
91
  * `finally`.
77
92
  */
78
93
  tryAcquire(key: string): Promise<AdvisoryLockHandle | null>;
94
+ /**
95
+ * Run `fn` while holding a transaction-scoped advisory lock for `key`,
96
+ * acquired with `pg_advisory_xact_lock` (which BLOCKS until granted) on a
97
+ * dedicated client from THIS service's pool, and auto-released when that
98
+ * transaction commits/rolls back after `fn` settles.
99
+ *
100
+ * The lock transaction runs on this service's (dedicated lock) pool, while
101
+ * `fn` does its real work on whatever database it already holds (typically
102
+ * the shared admin pool). Because the held lock connection and the work
103
+ * connection come from DIFFERENT pools, the nested acquisition can never
104
+ * deadlock the work pool. Use this for SHORT critical sections that gate a
105
+ * read-then-write on another connection.
106
+ */
107
+ withXactLock<T>(args: { key: string; fn: () => Promise<T> }): Promise<T>;
79
108
  }
80
109
 
110
+ /**
111
+ * Shared no-op `'error'` listener for held clients. A single module-level
112
+ * reference (rather than a fresh closure per acquisition) is what lets `off`
113
+ * detach exactly the listener `on` attached, and avoids allocating one per
114
+ * lock. It captures nothing, so sharing it is safe.
115
+ */
116
+ const swallowClientError = (): void => {};
117
+
81
118
  /**
82
119
  * Build an {@link AdvisoryLockService} backed by a pool. The backend
83
120
  * provides the real admin pool; tests can provide a faithful fake that
@@ -95,8 +132,13 @@ export function createAdvisoryLockService(
95
132
  // here; without a listener the process crashes. Swallow it - the session
96
133
  // lock is auto-released server-side when the backend dies, and a stale
97
134
  // `release()` is already a no-op-safe `finally`, so the loss surfaces as
98
- // the key simply becoming acquirable again.
99
- client.on("error", () => {});
135
+ // the key simply becoming acquirable again. The listener is removed on
136
+ // release so it does not accumulate on the reused pooled connection.
137
+ client.on("error", swallowClientError);
138
+ const releaseClient = () => {
139
+ client.off("error", swallowClientError);
140
+ client.release();
141
+ };
100
142
  let acquired = false;
101
143
  try {
102
144
  const result = await client.query<{ ok: boolean }>(
@@ -105,14 +147,14 @@ export function createAdvisoryLockService(
105
147
  );
106
148
  acquired = Boolean(result.rows[0]?.ok);
107
149
  } catch (error) {
108
- client.release();
150
+ releaseClient();
109
151
  throw error;
110
152
  }
111
153
  if (!acquired) {
112
154
  // Did not get the lock — return the client immediately. (A failed
113
155
  // pg_try_advisory_lock acquires nothing, so there is nothing to
114
156
  // unlock.)
115
- client.release();
157
+ releaseClient();
116
158
  return null;
117
159
  }
118
160
 
@@ -127,48 +169,48 @@ export function createAdvisoryLockService(
127
169
  [key],
128
170
  );
129
171
  } finally {
130
- client.release();
172
+ releaseClient();
131
173
  }
132
174
  },
133
175
  };
134
176
  },
135
- };
136
- }
137
177
 
138
- /**
139
- * Run `fn` while holding a transaction-scoped advisory lock for `key`. The
140
- * lock is acquired with `pg_advisory_xact_lock` (which BLOCKS until granted)
141
- * inside a transaction and auto-released at COMMIT/ROLLBACK, so there is no
142
- * unlock to leak. Use only for SHORT critical sections the lock is held
143
- * for the whole transaction.
144
- *
145
- * Because the scoped DB runs an entire `transaction()` callback on a single
146
- * dedicated connection, the lock + the work + the implicit release all share
147
- * one session, which is exactly the affinity session locks require.
148
- *
149
- * `fn` receives the transaction handle `tx` and MUST run its
150
- * read-then-write critical section on it (not on the outer pool). Running
151
- * the work on the pool would put it on a DIFFERENT connection than the one
152
- * holding the lock — so two concurrent callers' critical sections could
153
- * interleave even though both "hold" the lock. Using `tx` keeps the
154
- * read-check + write atomic with respect to the lock.
155
- */
156
- export async function withXactLock<
157
- S extends Record<string, unknown>,
158
- T,
159
- >({
160
- db,
161
- key,
162
- fn,
163
- }: {
164
- db: SafeDatabase<S>;
165
- key: string;
166
- fn: (tx: Parameters<Parameters<SafeDatabase<S>["transaction"]>[0]>[0]) => Promise<T>;
167
- }): Promise<T> {
168
- return db.transaction(async (tx) => {
169
- await tx.execute(
170
- sql`SELECT pg_advisory_xact_lock(hashtextextended(${key}, 0))`,
171
- );
172
- return fn(tx);
173
- });
178
+ async withXactLock({ key, fn }) {
179
+ const client = await pool.connect();
180
+ // Same rationale as tryAcquire: the lock transaction keeps this client
181
+ // checked out (idle in transaction) while `fn` runs, so attach an error
182
+ // listener to survive a backend termination instead of crashing the pod.
183
+ // Removed in the finally so it does not accumulate on the reused client.
184
+ client.on("error", swallowClientError);
185
+ try {
186
+ await client.query("BEGIN");
187
+ try {
188
+ // BLOCKS on this dedicated client until the lock is granted; auto-
189
+ // released by the COMMIT/ROLLBACK below. `fn`'s own work runs on a
190
+ // DIFFERENT pool, so no same-pool nested-acquisition deadlock.
191
+ await client.query(
192
+ "SELECT pg_advisory_xact_lock(hashtextextended($1, 0))",
193
+ [key],
194
+ );
195
+ const result = await fn();
196
+ await client.query("COMMIT");
197
+ return result;
198
+ } catch (error) {
199
+ // Roll back so the xact lock releases and nothing partial lingers on
200
+ // this connection before it returns to the pool. Best-effort: if the
201
+ // backend already died, ROLLBACK throws but release() still frees the
202
+ // slot and the lock is auto-released server-side.
203
+ try {
204
+ await client.query("ROLLBACK");
205
+ } catch (rollbackError) {
206
+ void rollbackError;
207
+ }
208
+ throw error;
209
+ }
210
+ } finally {
211
+ client.off("error", swallowClientError);
212
+ client.release();
213
+ }
214
+ },
215
+ };
174
216
  }
@@ -1,6 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import type { Migration } from "./config-versioning";
3
- import type { LucideIconName } from "@checkstack/common";
3
+ import type { IconName } from "@checkstack/common";
4
4
 
5
5
  /**
6
6
  * Migration chain for auth strategy configurations.
@@ -21,8 +21,11 @@ export interface AuthStrategy<Config = unknown> {
21
21
  /** Optional description of the strategy */
22
22
  description?: string;
23
23
 
24
- /** Lucide icon name in PascalCase (e.g., 'Github', 'Chrome', 'Mail') */
25
- icon?: LucideIconName;
24
+ /**
25
+ * Icon name in PascalCase. A lucide icon (e.g. 'Mail') or a vendored brand
26
+ * icon (e.g. 'Github') - see `IconName` / `DynamicIcon`.
27
+ */
28
+ icon?: IconName;
26
29
 
27
30
  /** Current version of the configuration schema */
28
31
  configVersion: number;
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Shared opaque-bearer extraction used by the OAuth resource-server validate
3
+ * path (auth-backend) and the MCP transport (ai-backend) so the two never
4
+ * drift. A `ck_` API key is NOT an opaque OAuth bearer token (it has its own
5
+ * dedicated auth branch), so it is explicitly excluded here.
6
+ */
7
+ export function opaqueBearerToken(request: Request): string | undefined {
8
+ const header = request.headers.get("authorization");
9
+ if (!header?.startsWith("Bearer ") || header.startsWith("Bearer ck_")) {
10
+ return undefined;
11
+ }
12
+ return header.slice("Bearer ".length);
13
+ }
@@ -24,6 +24,15 @@ export interface CollectorResult<TResult> {
24
24
  export interface CollectorRunContext {
25
25
  check: { id: string; name: string; intervalSeconds: number };
26
26
  system: { id: string; name: string };
27
+ /**
28
+ * The resolved environment for THIS run, when the check fanned out into
29
+ * one. Absent when the assignment opts out or the system has no
30
+ * environments (the env-less run). `fields` is the environment's
31
+ * free-form custom metadata (verbatim values) - metadata only, never
32
+ * secrets. Exposed to scripts as `globalThis.context.environment` (inline)
33
+ * and the `CHECKSTACK_ENV_*` shell vars.
34
+ */
35
+ environment?: { id: string; name: string; fields: Record<string, unknown> };
27
36
  }
28
37
 
29
38
  /**