@checkstack/backend 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,43 @@
1
1
  # @checkstack/backend
2
2
 
3
+ ## 0.13.0
4
+
5
+ ### Minor Changes
6
+
7
+ - af6bda7: Fix plugin migrations failing on upgrade with `type "..." does not exist`.
8
+
9
+ Plugin migrations are schema-agnostic and rely on `search_path` to resolve
10
+ unqualified names into the plugin's schema (e.g. `plugin_healthcheck`). The
11
+ loader set `search_path` at the session level on the shared admin pool and
12
+ then called Drizzle's `migrate()`. Because `migrate()` runs all pending
13
+ migrations inside its own transaction, a `pg.Pool` could service that
14
+ transaction on a different physical connection than the one the `SET` ran on,
15
+ so the migration SQL executed against `public` instead.
16
+
17
+ This was invisible on a fresh database (every object is created within that
18
+ one transaction, so unqualified references still resolve), but broke upgrades:
19
+ the healthcheck plugin's new `health_check_state_transitions` migration
20
+ references the pre-existing `health_check_status` enum, which an earlier
21
+ migration created in the plugin schema. On a different pooled connection that
22
+ enum is not on the `public` `search_path`, so startup failed with
23
+ `type "health_check_status" does not exist` and the pod crash-looped.
24
+
25
+ Migrations now run on a single pinned pool connection: the loader checks out
26
+ one dedicated client, sets `search_path` on it, and binds the migrator to that
27
+ same client, mirroring the connection-affinity pattern already used by the
28
+ advisory-lock service. Every migration statement now runs under the intended
29
+ schema.
30
+
31
+ Boot was also restructured into two passes over the topologically-sorted
32
+ plugins: pass 1 runs every plugin's migrations, pass 2 runs every plugin's
33
+ `init()`. Previously the two were interleaved per plugin, so an
34
+ already-initialized plugin's background work (queue consumers, sweepers,
35
+ reactive-entity/event wiring) could compete for pool connections while a later
36
+ plugin was still migrating. Running all migrations first keeps the pool quiet
37
+ during migrations and removes that race entirely. The pinned connection and the
38
+ two-pass ordering are each independently sufficient for the fix above; together
39
+ they make boot robust regardless of what else touches the pool.
40
+
3
41
  ## 0.12.0
4
42
 
5
43
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/backend",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "license": "Elastic-2.0",
5
5
  "checkstack": {
6
6
  "type": "backend"
@@ -1,9 +1,7 @@
1
- import { migrate } from "drizzle-orm/node-postgres/migrator";
2
- import { NodePgDatabase } from "drizzle-orm/node-postgres";
3
1
  import path from "node:path";
4
2
  import fs from "node:fs";
5
3
  import type { Hono } from "hono";
6
- import { eq, and, sql } from "drizzle-orm";
4
+ import { eq, and } from "drizzle-orm";
7
5
  import type { SafeDatabase } from "@checkstack/backend-api";
8
6
  import {
9
7
  coreServices,
@@ -23,6 +21,8 @@ import { rootLogger } from "../logger";
23
21
  import type { ServiceRegistry } from "../services/service-registry";
24
22
  import { plugins } from "../schema";
25
23
  import { stripPublicSchemaFromMigrations } from "../utils/strip-public-schema";
24
+ import { runPluginMigrations } from "../utils/run-plugin-migrations";
25
+ import { adminPool } from "../db";
26
26
  import {
27
27
  discoverLocalPlugins,
28
28
  syncPluginsToDatabase,
@@ -309,6 +309,17 @@ export async function loadPlugins({
309
309
  }
310
310
 
311
311
  // Phase 2: Initialize Plugins (Topological Sort)
312
+ //
313
+ // Done in two passes over the topologically-sorted plugins:
314
+ // Pass 1 - run EVERY plugin's migrations.
315
+ // Pass 2 - resolve deps + run EVERY plugin's init().
316
+ // Splitting the passes guarantees no plugin's init() (which may start
317
+ // background DB work - queue consumers, sweepers, reactive-entity/event
318
+ // wiring) is running while another plugin is still migrating. That
319
+ // interleaving is what could divert a migration onto a pooled connection
320
+ // without the plugin's search_path. `runPluginMigrations` pins a connection
321
+ // too, so each measure is independently sufficient; together they make boot
322
+ // robust regardless of what touches the pool.
312
323
  const logger = await deps.registry.get(coreServices.logger, {
313
324
  pluginId: "core",
314
325
  });
@@ -347,9 +358,9 @@ export async function loadPlugins({
347
358
  // pre-existing behavior and is preferable to a multi-second hang.
348
359
  deps.onApiRouteRegistered?.();
349
360
 
361
+ // Phase 2, pass 1: run every plugin's migrations BEFORE any plugin init.
350
362
  for (const id of sortedIds) {
351
363
  const p = pendingInits.find((x) => x.metadata.pluginId === id)!;
352
- rootLogger.info(`🚀 Initializing ${p.metadata.pluginId}...`);
353
364
 
354
365
  try {
355
366
  /**
@@ -372,34 +383,32 @@ export async function loadPlugins({
372
383
  * causing "relation does not exist" errors since the tables are actually in
373
384
  * the plugin's schema (e.g., `plugin_maintenance.maintenances`).
374
385
  *
375
- * ## Session-Level vs Transaction-Level search_path
386
+ * ## Why a pinned connection (not a session-level SET on the pool)
387
+ *
388
+ * The migration `search_path` MUST be set on the exact connection the
389
+ * migration statements run on. Setting it at the session level on the
390
+ * shared `adminPool` does not achieve that: `migrate()` runs all pending
391
+ * migrations inside its own transaction, which a `pg.Pool` may service on
392
+ * a *different* physical connection than the `SET` ran on. The migration
393
+ * SQL would then execute against `public`.
376
394
  *
377
- * We use **session-level** `SET search_path` (not `SET LOCAL`) here because:
378
- * - `migrate()` runs multiple statements and may manage its own transactions
379
- * - `SET LOCAL` only persists within a single transaction
380
- * - Session-level SET persists until explicitly changed or session ends
395
+ * `runPluginMigrations()` therefore checks out ONE dedicated client from
396
+ * the pool, sets `search_path` on it, and binds the migrator to that same
397
+ * client - the same connection-affinity pattern the advisory-lock service
398
+ * uses (see `advisory-lock.ts`). The bug this prevents is invisible on a
399
+ * fresh database (every object is created in one transaction, so
400
+ * unqualified references still resolve) but breaks UPGRADES: a new
401
+ * migration that references an enum an earlier migration created in the
402
+ * plugin schema fails with `type "..." does not exist`.
381
403
  *
382
404
  * ## Why This Doesn't Affect Runtime Queries
383
405
  *
384
406
  * After migrations complete, plugins receive their database via
385
407
  * `createScopedDb()` which wraps every query in a transaction with
386
408
  * `SET LOCAL search_path`. This ensures runtime queries always use the
387
- * correct schema, regardless of the session-level search_path.
388
- *
389
- * ## Potential Hazards
390
- *
391
- * 1. **Error During Migration**: If a migration fails, the search_path may
392
- * remain set to that plugin's schema. The next plugin's migration would
393
- * fail visibly (wrong schema), which is better than silent data corruption.
394
- *
395
- * 2. **Parallel Migration Execution**: This code assumes sequential plugin
396
- * initialization (which is enforced by the topologically-sorted loop).
397
- * If migrations ever run in parallel, search_path conflicts would occur.
398
- *
399
- * 3. **Connection Pool Pollution**: `SET` without `LOCAL` affects the entire
400
- * session. However, we reset to `public` after each plugin's migrations,
401
- * and runtime queries use `SET LOCAL` anyway, so this is safe.
409
+ * correct schema.
402
410
  *
411
+ * @see runPluginMigrations in ../utils/run-plugin-migrations.ts
403
412
  * @see createScopedDb in ../utils/scoped-db.ts for runtime query isolation
404
413
  * @see getPluginSchemaName in @checkstack/drizzle-helper for schema naming
405
414
  * =======================================================================
@@ -419,29 +428,13 @@ export async function loadPlugins({
419
428
  ` -> Running migrations for ${p.metadata.pluginId} from ${migrationsFolder}`,
420
429
  );
421
430
 
422
- // Create schema if it doesn't exist BEFORE running migrations.
423
- // Without this, SET search_path to a non-existent schema causes
424
- // PostgreSQL to fall back to 'public', creating tables in the wrong schema.
425
- await deps.db.execute(
426
- sql.raw(`CREATE SCHEMA IF NOT EXISTS "${migrationsSchema}"`),
427
- );
428
-
429
- // Set search_path to plugin schema before running migrations.
430
- // Uses session-level SET (not SET LOCAL) because migrate() may run
431
- // multiple statements across transaction boundaries.
432
- // No 'public' fallback: schema is guaranteed to exist from CREATE above.
433
- await deps.db.execute(
434
- sql.raw(`SET search_path = "${migrationsSchema}"`),
435
- );
436
- // Drizzle migrate() requires NodePgDatabase, cast from SafeDatabase
437
- await migrate(deps.db as NodePgDatabase<Record<string, unknown>>, {
431
+ // Run on a single pinned connection so the search_path we set is the
432
+ // one the migration statements actually execute under.
433
+ await runPluginMigrations({
434
+ pool: adminPool,
438
435
  migrationsFolder,
439
436
  migrationsSchema,
440
437
  });
441
-
442
- // Reset search_path to public after migrations complete.
443
- // This prevents search_path leaking into subsequent plugin migrations.
444
- await deps.db.execute(sql.raw(`SET search_path = public`));
445
438
  } catch (error) {
446
439
  rootLogger.error(
447
440
  `❌ Failed migration of plugin ${p.metadata.pluginId}:`,
@@ -456,7 +449,25 @@ export async function loadPlugins({
456
449
  ` -> No migrations found for ${p.metadata.pluginId} (skipping)`,
457
450
  );
458
451
  }
452
+ } catch (error) {
453
+ rootLogger.error(
454
+ `❌ Critical error loading plugin ${p.metadata.pluginId}:`,
455
+ error,
456
+ );
457
+ throw new Error(`Critical error loading plugin ${p.metadata.pluginId}`, {
458
+ cause: error,
459
+ });
460
+ }
461
+ }
462
+
463
+ // Phase 2, pass 2: initialize plugins in topological order. Every plugin -
464
+ // and therefore every dependency - is fully migrated by now, so an init()
465
+ // can assume all plugin schemas exist.
466
+ for (const id of sortedIds) {
467
+ const p = pendingInits.find((x) => x.metadata.pluginId === id)!;
468
+ rootLogger.info(`🚀 Initializing ${p.metadata.pluginId}...`);
459
469
 
470
+ try {
460
471
  // Resolve Dependencies
461
472
  const resolvedDeps: Record<string, unknown> = {};
462
473
  for (const [key, ref] of Object.entries(p.deps)) {
@@ -486,5 +486,49 @@ describe("PluginManager", () => {
486
486
 
487
487
  expect(testBackendInit).toHaveBeenCalled();
488
488
  });
489
+
490
+ it("initializes every plugin across the two-pass (migrate-all, then init-all) loop", async () => {
491
+ const mockRouter = {
492
+ route: mock(),
493
+ all: mock(),
494
+ newResponse: mock(),
495
+ } as never;
496
+
497
+ // Boot runs migrations for all plugins in pass 1, then inits in pass 2.
498
+ // Manual test plugins have no plugin path so pass 1 is a no-op for them;
499
+ // this guards that pass 2 still initializes EVERY plugin (the split loop
500
+ // doesn't drop any) and follows topological order.
501
+ const initOrder: string[] = [];
502
+ const makePlugin = (pluginId: string) =>
503
+ createBackendPlugin({
504
+ metadata: { pluginId },
505
+ register(env) {
506
+ env.registerInit({
507
+ deps: {},
508
+ init: async () => {
509
+ initOrder.push(pluginId);
510
+ },
511
+ });
512
+ },
513
+ });
514
+
515
+ pluginManager.registerService(
516
+ coreServices.queueManager,
517
+ createMockQueueManager(),
518
+ );
519
+ pluginManager.registerService(coreServices.logger, createMockLogger());
520
+ pluginManager.registerService(
521
+ coreServices.database,
522
+ createMockDb() as never,
523
+ );
524
+
525
+ await pluginManager.loadPlugins(
526
+ mockRouter,
527
+ [makePlugin("plugin-a"), makePlugin("plugin-b"), makePlugin("plugin-c")],
528
+ { skipDiscovery: true },
529
+ );
530
+
531
+ expect(initOrder).toEqual(["plugin-a", "plugin-b", "plugin-c"]);
532
+ });
489
533
  });
490
534
  });
@@ -0,0 +1,124 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import type { PoolClient } from "pg";
3
+ import type { NodePgDatabase } from "drizzle-orm/node-postgres";
4
+ import { runPluginMigrations } from "./run-plugin-migrations";
5
+
6
+ type MigrationDb = NodePgDatabase<Record<string, unknown>>;
7
+
8
+ /**
9
+ * A fake pooled client that records the SQL it runs and whether it was
10
+ * released, modelling the bits of `pg.PoolClient` the helper touches.
11
+ */
12
+ function makeFakeClient() {
13
+ const queries: string[] = [];
14
+ let released = false;
15
+ const client = {
16
+ query: async (text: string) => {
17
+ queries.push(text);
18
+ return { rows: [] };
19
+ },
20
+ release: () => {
21
+ released = true;
22
+ },
23
+ };
24
+ return {
25
+ client: client as unknown as PoolClient,
26
+ queries,
27
+ isReleased: () => released,
28
+ };
29
+ }
30
+
31
+ describe("runPluginMigrations", () => {
32
+ it("runs the migrator on a single pinned connection with search_path set first", async () => {
33
+ const { client, queries, isReleased } = makeFakeClient();
34
+
35
+ let connectCount = 0;
36
+ const pool = {
37
+ connect: async () => {
38
+ connectCount++;
39
+ return client;
40
+ },
41
+ };
42
+
43
+ const fakeDb = { __fake: true } as unknown as MigrationDb;
44
+ let dbPassedToMigrate: unknown;
45
+ let clientPassedToFactory: PoolClient | undefined;
46
+ let queriesBeforeMigrate: string[] = [];
47
+
48
+ await runPluginMigrations({
49
+ pool,
50
+ migrationsFolder: "/plugins/healthcheck/drizzle",
51
+ migrationsSchema: "plugin_healthcheck",
52
+ createMigrationDb: (c) => {
53
+ clientPassedToFactory = c;
54
+ return fakeDb;
55
+ },
56
+ migrate: async (db, config) => {
57
+ dbPassedToMigrate = db;
58
+ queriesBeforeMigrate = [...queries];
59
+ expect(config.migrationsFolder).toBe("/plugins/healthcheck/drizzle");
60
+ expect(config.migrationsSchema).toBe("plugin_healthcheck");
61
+ },
62
+ });
63
+
64
+ // Exactly ONE connection is checked out: the SET and the migration must
65
+ // share a physical connection, which was the whole bug.
66
+ expect(connectCount).toBe(1);
67
+
68
+ // The migrator runs against a Drizzle instance bound to that same pinned
69
+ // client.
70
+ expect(clientPassedToFactory).toBe(client);
71
+ expect(dbPassedToMigrate).toBe(fakeDb);
72
+
73
+ // search_path is pointed at the plugin schema (after creating it) BEFORE
74
+ // the migrator runs.
75
+ expect(queriesBeforeMigrate).toEqual([
76
+ 'CREATE SCHEMA IF NOT EXISTS "plugin_healthcheck"',
77
+ 'SET search_path = "plugin_healthcheck"',
78
+ ]);
79
+
80
+ // Afterwards the connection is reset and returned to the pool.
81
+ expect(queries.at(-1)).toBe("SET search_path = public");
82
+ expect(isReleased()).toBe(true);
83
+ });
84
+
85
+ it("resets search_path and releases the connection even when the migrator throws", async () => {
86
+ const { client, queries, isReleased } = makeFakeClient();
87
+ const pool = { connect: async () => client };
88
+ const boom = new Error("migration failed");
89
+
90
+ await expect(
91
+ runPluginMigrations({
92
+ pool,
93
+ migrationsFolder: "/x",
94
+ migrationsSchema: "plugin_x",
95
+ createMigrationDb: () => ({}) as unknown as MigrationDb,
96
+ migrate: async () => {
97
+ throw boom;
98
+ },
99
+ }),
100
+ ).rejects.toThrow("migration failed");
101
+
102
+ expect(queries.at(-1)).toBe("SET search_path = public");
103
+ expect(isReleased()).toBe(true);
104
+ });
105
+
106
+ it("never touches anything but connect() on the pool (no session SET on the shared pool)", async () => {
107
+ const { client } = makeFakeClient();
108
+ const pool = { connect: async () => client };
109
+
110
+ await runPluginMigrations({
111
+ pool,
112
+ migrationsFolder: "/x",
113
+ migrationsSchema: "plugin_x",
114
+ createMigrationDb: () => ({}) as unknown as MigrationDb,
115
+ migrate: async () => {},
116
+ });
117
+
118
+ // The pool surface the helper depends on is exactly `connect`; everything
119
+ // else (CREATE SCHEMA, SET search_path, the migration itself) happens on
120
+ // the checked-out client. If this contract ever widens, the regression
121
+ // that motivated the pinned connection could creep back in.
122
+ expect(Object.keys(pool)).toEqual(["connect"]);
123
+ });
124
+ });
@@ -0,0 +1,88 @@
1
+ import { drizzle, type NodePgDatabase } from "drizzle-orm/node-postgres";
2
+ import { migrate as defaultMigrate } from "drizzle-orm/node-postgres/migrator";
3
+ import type { Pool, PoolClient } from "pg";
4
+
5
+ type MigrationDb = NodePgDatabase<Record<string, unknown>>;
6
+
7
+ export interface RunPluginMigrationsArgs {
8
+ /** Shared admin pool; the helper checks out ONE dedicated client from it. */
9
+ pool: Pick<Pool, "connect">;
10
+ /** Absolute path to the plugin's Drizzle migrations folder. */
11
+ migrationsFolder: string;
12
+ /**
13
+ * Postgres schema the plugin's objects live in (e.g. `plugin_healthcheck`).
14
+ * Also used by Drizzle for the per-plugin `__drizzle_migrations` table.
15
+ */
16
+ migrationsSchema: string;
17
+ /**
18
+ * Builds the Drizzle instance the migrator runs against. Defaults to one
19
+ * bound to the pinned `client`. Injectable so tests can run without a real
20
+ * connection.
21
+ */
22
+ createMigrationDb?: (client: PoolClient) => MigrationDb;
23
+ /** Drizzle's migrator. Injectable for tests. */
24
+ migrate?: (
25
+ db: MigrationDb,
26
+ config: { migrationsFolder: string; migrationsSchema: string },
27
+ ) => Promise<void>;
28
+ }
29
+
30
+ /**
31
+ * Run a plugin's Drizzle migrations on a SINGLE pinned pool connection.
32
+ *
33
+ * ## Why a pinned connection is required
34
+ *
35
+ * Plugin migrations are schema-agnostic: they reference the plugin's tables,
36
+ * types, and enums *unqualified* and rely on `search_path` to resolve them
37
+ * into the plugin's schema (e.g. `plugin_healthcheck`). So `search_path` must
38
+ * be set before the migration SQL runs.
39
+ *
40
+ * Setting it at the *session* level on the shared pool does NOT work, for the
41
+ * same reason session-level advisory locks don't (see `advisory-lock.ts`):
42
+ * Drizzle's `migrate()` wraps all pending migrations in one transaction, and
43
+ * with a `pg.Pool` that transaction checks out a *different* physical
44
+ * connection than the one the `SET` ran on. The migration statements then
45
+ * execute with the default `public` search_path.
46
+ *
47
+ * This stays invisible on a fresh database - every object (including each
48
+ * enum) is created within that one transaction, so unqualified references
49
+ * still resolve against whatever schema that connection happens to use. But on
50
+ * an UPGRADE, where earlier migrations already created an enum in the plugin
51
+ * schema and only newer migrations run, a new migration that references the
52
+ * pre-existing enum fails with `type "..." does not exist`.
53
+ *
54
+ * Binding the migrator to ONE pinned client, on which we set `search_path`
55
+ * first, guarantees every migration statement runs under the intended schema.
56
+ */
57
+ export async function runPluginMigrations({
58
+ pool,
59
+ migrationsFolder,
60
+ migrationsSchema,
61
+ createMigrationDb = (client) => drizzle(client),
62
+ migrate = defaultMigrate,
63
+ }: RunPluginMigrationsArgs): Promise<void> {
64
+ const client = await pool.connect();
65
+ try {
66
+ // Ensure the schema exists before pointing search_path at it. SET to a
67
+ // missing schema silently falls back to `public` at resolution time, which
68
+ // would recreate the very bug this helper exists to prevent.
69
+ await client.query(`CREATE SCHEMA IF NOT EXISTS "${migrationsSchema}"`);
70
+ await client.query(`SET search_path = "${migrationsSchema}"`);
71
+
72
+ await migrate(createMigrationDb(client), {
73
+ migrationsFolder,
74
+ migrationsSchema,
75
+ });
76
+ } finally {
77
+ // Reset before the client returns to the pool so the setting never leaks
78
+ // onto an unrelated query that later reuses this physical connection.
79
+ try {
80
+ await client.query("SET search_path = public");
81
+ } catch (resetError) {
82
+ // Best-effort: the release below still returns the connection. Reference
83
+ // the binding so lint doesn't flag an empty catch.
84
+ void resetError;
85
+ }
86
+ client.release();
87
+ }
88
+ }