@checkstack/backend 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +77 -0
- package/package.json +8 -8
- package/src/index.ts +184 -6
- package/src/plugin-manager/core-services.ts +21 -2
- package/src/plugin-manager/plugin-loader.ts +21 -0
- package/src/plugin-manager.ts +29 -0
- package/src/router-incremental.test.ts +49 -0
- package/src/services/readiness-registry.test.ts +124 -0
- package/src/services/readiness-registry.ts +103 -0
- package/tsconfig.json +33 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,82 @@
|
|
|
1
1
|
# @checkstack/backend
|
|
2
2
|
|
|
3
|
+
## 0.8.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 302cd3f: fix: resilient startup routing + /health and /ready endpoints
|
|
8
|
+
|
|
9
|
+
Three fixes that together eliminate startup-race errors during boot and
|
|
10
|
+
hot-reload, plus a new readiness API for plugins.
|
|
11
|
+
|
|
12
|
+
1. **TrieRouter swap (root cause).** Hono's default `SmartRouter` freezes
|
|
13
|
+
its matcher on the first request — any later `app.add()` throws
|
|
14
|
+
`MESSAGE_MATCHER_IS_ALREADY_BUILT`. Plugins register routes during
|
|
15
|
+
`init()` (and at runtime via `loadSinglePlugin`), so an early request
|
|
16
|
+
during boot would silently lock the matcher with only the module-load
|
|
17
|
+
routes, and every later route registration would fail. The backend
|
|
18
|
+
now uses `TrieRouter`, which is incremental — routes can be added at
|
|
19
|
+
any time, including after thousands of requests have been served.
|
|
20
|
+
This also future-proofs runtime plugin install.
|
|
21
|
+
|
|
22
|
+
2. **Init gating + fail-loud.** Non-bypass requests now `await` an
|
|
23
|
+
`initPromise` (with a 30s timeout that returns 503 + Retry-After) so
|
|
24
|
+
no traffic reaches Hono before plugins finish registering routes.
|
|
25
|
+
Init failures crash the process via `process.exit(1)` so docker/k8s
|
|
26
|
+
restart cleanly instead of silently serving a half-initialized
|
|
27
|
+
backend.
|
|
28
|
+
|
|
29
|
+
3. **`/assets/*` fall-through.** The production frontend asset handler
|
|
30
|
+
now calls `next()` instead of `c.notFound()` on miss, so
|
|
31
|
+
plugin-asset routes registered later (`/assets/plugins/:pluginName/*`)
|
|
32
|
+
actually get a chance to match.
|
|
33
|
+
|
|
34
|
+
### New: platform endpoints under `/.checkstack/*`
|
|
35
|
+
|
|
36
|
+
- `GET /.checkstack/health` — liveness, always 200 once the process is up.
|
|
37
|
+
- `GET /.checkstack/ready` — readiness, 503 until init completes and all
|
|
38
|
+
critical probes pass; 200 otherwise. Returns `{ ready, checks: [...] }`
|
|
39
|
+
with per-probe status, message/error and duration.
|
|
40
|
+
|
|
41
|
+
The leading `.checkstack/` prefix namespaces platform-level endpoints
|
|
42
|
+
away from plugin `/api/*`, runtime frontend assets, and the SPA wildcard,
|
|
43
|
+
leaving room for additional operator endpoints in the future.
|
|
44
|
+
|
|
45
|
+
### New: plugin readiness API
|
|
46
|
+
|
|
47
|
+
Plugins can contribute readiness probes via the new
|
|
48
|
+
`coreServices.readinessRegistry` service:
|
|
49
|
+
|
|
50
|
+
```ts
|
|
51
|
+
registerInit({
|
|
52
|
+
deps: { readiness: coreServices.readinessRegistry },
|
|
53
|
+
async init({ readiness }) {
|
|
54
|
+
readiness.register({
|
|
55
|
+
name: "queue.connected",
|
|
56
|
+
critical: true,
|
|
57
|
+
check: async () => ({
|
|
58
|
+
ok: pool.isConnected(),
|
|
59
|
+
message: pool.isConnected() ? undefined : "queue pool not connected",
|
|
60
|
+
}),
|
|
61
|
+
});
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Probes run in parallel, throwing probes are reported as `ok: false`,
|
|
67
|
+
and non-critical probes don't block readiness.
|
|
68
|
+
|
|
69
|
+
- Updated dependencies [302cd3f]
|
|
70
|
+
- @checkstack/backend-api@0.14.1
|
|
71
|
+
- @checkstack/cache-api@0.2.3
|
|
72
|
+
- @checkstack/queue-api@0.2.17
|
|
73
|
+
- @checkstack/signal-backend@0.2.2
|
|
74
|
+
- @checkstack/api-docs-common@0.1.10
|
|
75
|
+
- @checkstack/auth-common@0.6.4
|
|
76
|
+
- @checkstack/common@0.7.0
|
|
77
|
+
- @checkstack/drizzle-helper@0.0.4
|
|
78
|
+
- @checkstack/signal-common@0.2.0
|
|
79
|
+
|
|
3
80
|
## 0.8.1
|
|
4
81
|
|
|
5
82
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@checkstack/backend",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.2",
|
|
4
4
|
"checkstack": {
|
|
5
5
|
"type": "backend"
|
|
6
6
|
},
|
|
7
7
|
"type": "module",
|
|
8
8
|
"scripts": {
|
|
9
9
|
"dev": "bun --env-file=../../.env --watch src/index.ts",
|
|
10
|
-
"typecheck": "
|
|
10
|
+
"typecheck": "tsgo -b",
|
|
11
11
|
"generate": "bun --env-file=../../.env run drizzle-kit generate",
|
|
12
12
|
"lint": "bun run lint:code",
|
|
13
13
|
"lint:code": "eslint . --max-warnings 0"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@checkstack/api-docs-common": "0.1.10",
|
|
17
|
-
"@checkstack/auth-common": "0.6.
|
|
18
|
-
"@checkstack/backend-api": "0.
|
|
17
|
+
"@checkstack/auth-common": "0.6.4",
|
|
18
|
+
"@checkstack/backend-api": "0.14.0",
|
|
19
19
|
"@checkstack/common": "0.7.0",
|
|
20
20
|
"@checkstack/drizzle-helper": "0.0.4",
|
|
21
|
-
"@checkstack/cache-api": "0.2.
|
|
22
|
-
"@checkstack/queue-api": "0.2.
|
|
23
|
-
"@checkstack/signal-backend": "0.2.
|
|
21
|
+
"@checkstack/cache-api": "0.2.2",
|
|
22
|
+
"@checkstack/queue-api": "0.2.16",
|
|
23
|
+
"@checkstack/signal-backend": "0.2.1",
|
|
24
24
|
"@checkstack/signal-common": "0.2.0",
|
|
25
25
|
"@hono/zod-validator": "^0.7.6",
|
|
26
26
|
"@orpc/client": "^1.13.14",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"@types/bun": "latest",
|
|
42
42
|
"@checkstack/tsconfig": "0.0.5",
|
|
43
43
|
"@checkstack/scripts": "0.1.2",
|
|
44
|
-
"@checkstack/test-utils-backend": "0.1.
|
|
44
|
+
"@checkstack/test-utils-backend": "0.1.22",
|
|
45
45
|
"drizzle-kit": "^0.31.10"
|
|
46
46
|
}
|
|
47
47
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { Server } from "bun";
|
|
2
2
|
import { type Context, Hono } from "hono";
|
|
3
|
+
import { TrieRouter } from "hono/router/trie-router";
|
|
3
4
|
import { PluginManager } from "./plugin-manager";
|
|
4
5
|
import { logger } from "hono/logger";
|
|
5
6
|
import { migrate } from "drizzle-orm/node-postgres/migrator";
|
|
@@ -8,6 +9,7 @@ import path from "node:path";
|
|
|
8
9
|
import fs from "node:fs";
|
|
9
10
|
import { rootLogger } from "./logger";
|
|
10
11
|
import { coreServices, coreHooks } from "@checkstack/backend-api";
|
|
12
|
+
import { extractErrorMessage } from "@checkstack/common";
|
|
11
13
|
import { plugins } from "./schema";
|
|
12
14
|
import { eq, and } from "drizzle-orm";
|
|
13
15
|
import { PluginLocalInstaller } from "./services/plugin-installer";
|
|
@@ -52,9 +54,40 @@ import {
|
|
|
52
54
|
|
|
53
55
|
import { cors } from "hono/cors";
|
|
54
56
|
|
|
55
|
-
|
|
57
|
+
// IMPORTANT: TrieRouter (not the default SmartRouter).
|
|
58
|
+
// SmartRouter freezes its matcher on the first incoming request — any later
|
|
59
|
+
// app.add() throws "Can not add a route since the matcher is already built".
|
|
60
|
+
// Plugins register routes asynchronously during init() and at runtime via
|
|
61
|
+
// loadSinglePlugin(), so we need an incremental router.
|
|
62
|
+
const app = new Hono({ router: new TrieRouter() });
|
|
56
63
|
const pluginManager = new PluginManager();
|
|
57
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Init lifecycle state.
|
|
67
|
+
*
|
|
68
|
+
* `initialized` flips to true after the entire init() completes (Phases 1-3).
|
|
69
|
+
* It feeds the "core.init" readiness probe consumed by /ready.
|
|
70
|
+
*
|
|
71
|
+
* `initError` is populated when init throws; the process is then exited so
|
|
72
|
+
* the supervisor (docker/k8s) restarts us — we never serve a half-initialized
|
|
73
|
+
* backend.
|
|
74
|
+
*
|
|
75
|
+
* The HTTP request gate does NOT key off these flags directly. It awaits
|
|
76
|
+
* `pluginManager.routesReadyPromise`, which resolves earlier — right after
|
|
77
|
+
* `/api/:pluginId/*` is added to the root router and BEFORE `afterPluginsReady`
|
|
78
|
+
* runs — so cross-plugin RPC calls during plugin boot don't deadlock on
|
|
79
|
+
* themselves.
|
|
80
|
+
*/
|
|
81
|
+
let initError: Error | undefined;
|
|
82
|
+
let initialized = false;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Maximum time a request will wait for init to complete before falling back
|
|
86
|
+
* to a 503 Service Unavailable. Without this, a wedged plugin would hang
|
|
87
|
+
* health probes forever.
|
|
88
|
+
*/
|
|
89
|
+
const READY_WAIT_TIMEOUT_MS = 30_000;
|
|
90
|
+
|
|
58
91
|
// WebSocket handler instance (initialized during init)
|
|
59
92
|
let wsHandler: ReturnType<typeof createWebSocketHandler> | undefined;
|
|
60
93
|
|
|
@@ -82,6 +115,50 @@ app.use(
|
|
|
82
115
|
);
|
|
83
116
|
app.use("*", logger());
|
|
84
117
|
|
|
118
|
+
// =============================================================================
|
|
119
|
+
// PLATFORM ENDPOINTS — /.checkstack/*
|
|
120
|
+
// =============================================================================
|
|
121
|
+
//
|
|
122
|
+
// All "platform-level" endpoints (probes, future operator hooks) live under
|
|
123
|
+
// /.checkstack/* so they are clearly separated from plugin /api/*, runtime
|
|
124
|
+
// frontend assets, and the SPA wildcard. The leading dot keeps them out of
|
|
125
|
+
// any plugin URL space by construction.
|
|
126
|
+
//
|
|
127
|
+
// Health & readiness:
|
|
128
|
+
// - registered at module load; bypass the boot gate in `fetch()` so that
|
|
129
|
+
// orchestrators (Kubernetes, docker-compose) can probe a still-booting
|
|
130
|
+
// process.
|
|
131
|
+
// - /.checkstack/health = "process is alive"
|
|
132
|
+
// - /.checkstack/ready = "plugins initialized and all critical probes pass"
|
|
133
|
+
|
|
134
|
+
/** Liveness probe — answers as long as the process responds. */
|
|
135
|
+
app.get("/.checkstack/health", (c) => c.json({ status: "ok" }));
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Readiness probe — aggregates plugin-contributed checks.
|
|
139
|
+
* - 503 while init is in flight or has failed
|
|
140
|
+
* - 503 if any critical probe is failing
|
|
141
|
+
* - 200 only when init completed AND all critical probes pass
|
|
142
|
+
*/
|
|
143
|
+
app.get("/.checkstack/ready", async (c) => {
|
|
144
|
+
if (initError) {
|
|
145
|
+
return c.json(
|
|
146
|
+
{ ready: false, error: initError.message, checks: [] },
|
|
147
|
+
503,
|
|
148
|
+
{ "Retry-After": "5" },
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
if (!initialized) {
|
|
152
|
+
return c.json(
|
|
153
|
+
{ ready: false, reason: "initializing", checks: [] },
|
|
154
|
+
503,
|
|
155
|
+
{ "Retry-After": "1" },
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
const snapshot = await pluginManager.getReadinessRegistry().evaluate();
|
|
159
|
+
return c.json(snapshot, snapshot.ready ? 200 : 503);
|
|
160
|
+
});
|
|
161
|
+
|
|
85
162
|
// SECURITY: Add missing standard security headers across all API responses
|
|
86
163
|
app.use("/api/*", async (c, next) => {
|
|
87
164
|
await next();
|
|
@@ -185,14 +262,16 @@ if (frontendDistPath && fs.existsSync(frontendDistPath)) {
|
|
|
185
262
|
};
|
|
186
263
|
|
|
187
264
|
// Serve static assets (JS, CSS, images, etc.)
|
|
188
|
-
|
|
265
|
+
// Fall through to next() on miss so plugin-asset routes (registered later
|
|
266
|
+
// during init at /assets/plugins/:pluginName/*) get a chance to match.
|
|
267
|
+
app.get("/assets/*", async (c, next) => {
|
|
189
268
|
const assetPath = c.req.path.replace("/assets/", "");
|
|
190
269
|
const filePath = path.join(frontendDistPath, "assets", assetPath);
|
|
191
270
|
|
|
192
271
|
if (fs.existsSync(filePath)) {
|
|
193
272
|
return serveFile(c, filePath);
|
|
194
273
|
}
|
|
195
|
-
return
|
|
274
|
+
return next();
|
|
196
275
|
});
|
|
197
276
|
|
|
198
277
|
// Serve vendor scripts (externalized React, react-router-dom, etc.)
|
|
@@ -441,16 +520,117 @@ const init = async () => {
|
|
|
441
520
|
logger: rootLogger.child({ service: "WebSocket" }),
|
|
442
521
|
});
|
|
443
522
|
|
|
523
|
+
// Register the core "init" readiness probe. Plugin-contributed probes are
|
|
524
|
+
// additive — see coreServices.readinessRegistry for the plugin-facing API.
|
|
525
|
+
pluginManager.getReadinessRegistry().register({
|
|
526
|
+
name: "core.init",
|
|
527
|
+
critical: true,
|
|
528
|
+
check: async () => ({ ok: initialized, message: initialized ? undefined : "init not complete" }),
|
|
529
|
+
});
|
|
530
|
+
|
|
444
531
|
rootLogger.info("✅ Checkstack Core initialized.");
|
|
445
532
|
};
|
|
446
533
|
|
|
447
|
-
|
|
534
|
+
/**
|
|
535
|
+
* Fire-and-forget init. We deliberately don't `await` at the top level so the
|
|
536
|
+
* server can answer /health and /ready while plugins are still loading;
|
|
537
|
+
* non-bypass requests are gated via `waitForRoutesReady()` below.
|
|
538
|
+
*/
|
|
539
|
+
// eslint-disable-next-line unicorn/prefer-top-level-await -- intentionally non-blocking; gates handled in waitForRoutesReady()
|
|
540
|
+
void (async () => {
|
|
541
|
+
try {
|
|
542
|
+
await init();
|
|
543
|
+
initialized = true;
|
|
544
|
+
} catch (error: unknown) {
|
|
545
|
+
initError = new Error(extractErrorMessage(error, "init failed"));
|
|
546
|
+
rootLogger.error(
|
|
547
|
+
"❌ FATAL: Checkstack Core init failed; the process will exit so the supervisor can restart it.",
|
|
548
|
+
initError,
|
|
549
|
+
);
|
|
550
|
+
// Give the logger one tick to flush, then exit so docker/k8s restarts us.
|
|
551
|
+
// A half-initialized backend silently serves broken state — restart is
|
|
552
|
+
// strictly better than continuing. We disable the no-process-exit rule
|
|
553
|
+
// because this IS the canonical fail-fast pattern for a long-running
|
|
554
|
+
// server entrypoint.
|
|
555
|
+
setTimeout(() => {
|
|
556
|
+
// eslint-disable-next-line unicorn/no-process-exit -- intentional fail-fast on init failure
|
|
557
|
+
process.exit(1);
|
|
558
|
+
}, 50);
|
|
559
|
+
}
|
|
560
|
+
})();
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Paths that bypass the boot gate. Platform endpoints under /.checkstack/*
|
|
564
|
+
* MUST be reachable while the backend is still booting so orchestrators can
|
|
565
|
+
* probe it. Everything else waits until plugin routes are registered.
|
|
566
|
+
*/
|
|
567
|
+
const BOOT_BYPASS_PREFIX = "/.checkstack/";
|
|
568
|
+
|
|
569
|
+
/**
|
|
570
|
+
* Wait until plugin RPC routes are registered on the root router (resolved
|
|
571
|
+
* inside `loadPlugins` BEFORE Phase 2 / `afterPluginsReady`). Returns:
|
|
572
|
+
* - undefined when routes are ready → caller should proceed to Hono.
|
|
573
|
+
* - a 503 Response when init failed or the wait timed out.
|
|
574
|
+
*
|
|
575
|
+
* Why this gate, and why at this specific point:
|
|
576
|
+
* - Earlier (before /api/:pluginId/* is added), an incoming request would
|
|
577
|
+
* short-circuit through the SPA wildcard or 404 because the plugin route
|
|
578
|
+
* simply doesn't exist yet on the router.
|
|
579
|
+
* - Later (after full init), self-referencing RPC calls made from
|
|
580
|
+
* `afterPluginsReady` would deadlock waiting for init to complete — so
|
|
581
|
+
* we MUST open the gate before Phase 3 runs.
|
|
582
|
+
* - `loadPlugins()` resolves `routesReadyPromise` immediately after
|
|
583
|
+
* `registerApiRoute()`, which is the earliest point both conditions hold.
|
|
584
|
+
*/
|
|
585
|
+
async function waitForRoutesReady(): Promise<Response | undefined> {
|
|
586
|
+
if (initError) {
|
|
587
|
+
return Response.json(
|
|
588
|
+
{ error: "Backend init failed", message: initError.message },
|
|
589
|
+
{ status: 503, headers: { "Retry-After": "5" } },
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
|
|
593
|
+
// pluginManager.routesReadyPromise resolves from inside loadPlugins; it
|
|
594
|
+
// never rejects. The init catch handler logs + process.exit's separately.
|
|
595
|
+
const timedOut = await Promise.race([
|
|
596
|
+
pluginManager.routesReadyPromise.then(() => false),
|
|
597
|
+
new Promise<true>((resolve) => {
|
|
598
|
+
timeoutHandle = setTimeout(() => resolve(true), READY_WAIT_TIMEOUT_MS);
|
|
599
|
+
}),
|
|
600
|
+
]);
|
|
601
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
602
|
+
if (timedOut) {
|
|
603
|
+
return Response.json(
|
|
604
|
+
{ error: "Backend not ready", message: "boot timeout" },
|
|
605
|
+
{ status: 503, headers: { "Retry-After": "5" } },
|
|
606
|
+
);
|
|
607
|
+
}
|
|
608
|
+
// Re-read after await — init may have rejected while we were waiting.
|
|
609
|
+
const errAfter = initError as Error | undefined;
|
|
610
|
+
if (errAfter) {
|
|
611
|
+
return Response.json(
|
|
612
|
+
{ error: "Backend init failed", message: errAfter.message },
|
|
613
|
+
{ status: 503, headers: { "Retry-After": "5" } },
|
|
614
|
+
);
|
|
615
|
+
}
|
|
616
|
+
return undefined;
|
|
617
|
+
}
|
|
448
618
|
|
|
449
619
|
// Custom fetch handler that handles WebSocket upgrades
|
|
450
620
|
const fetch = async (
|
|
451
621
|
req: Request,
|
|
452
622
|
server: Server<ServerWsData>
|
|
453
623
|
): Promise<Response | undefined> => {
|
|
624
|
+
const url = new URL(req.url);
|
|
625
|
+
|
|
626
|
+
// Platform endpoints (/.checkstack/*) bypass the boot gate so orchestrators
|
|
627
|
+
// can poll a booting process. Everything else waits until plugin routes
|
|
628
|
+
// are registered on the root router (resolved before Phase 2 init runs).
|
|
629
|
+
if (!url.pathname.startsWith(BOOT_BYPASS_PREFIX)) {
|
|
630
|
+
const stalled = await waitForRoutesReady();
|
|
631
|
+
if (stalled) return stalled;
|
|
632
|
+
}
|
|
633
|
+
|
|
454
634
|
// Set the server reference for WebSocket pub/sub after startup
|
|
455
635
|
if (wsHandler && !server.upgrade) {
|
|
456
636
|
// Server doesn't support WebSocket upgrade (shouldn't happen with Bun)
|
|
@@ -461,8 +641,6 @@ const fetch = async (
|
|
|
461
641
|
// Cast is safe: signal handler only reads its own fields via connectionType guard
|
|
462
642
|
wsHandler?.setServer(server as unknown as Server<WebSocketData>);
|
|
463
643
|
|
|
464
|
-
const url = new URL(req.url);
|
|
465
|
-
|
|
466
644
|
// Handle WebSocket upgrade for signals
|
|
467
645
|
if (url.pathname === "/api/signals/ws") {
|
|
468
646
|
// Try to authenticate, but allow anonymous connections for broadcast signals
|
|
@@ -30,6 +30,10 @@ import {
|
|
|
30
30
|
WebSocketRouteStoreImpl,
|
|
31
31
|
createScopedWsRegistry,
|
|
32
32
|
} from "../services/ws-route-registry";
|
|
33
|
+
import {
|
|
34
|
+
CoreReadinessRegistry,
|
|
35
|
+
createScopedReadinessRegistry,
|
|
36
|
+
} from "../services/readiness-registry";
|
|
33
37
|
|
|
34
38
|
/**
|
|
35
39
|
* Check if a PostgreSQL schema exists.
|
|
@@ -59,7 +63,11 @@ export function registerCoreServices({
|
|
|
59
63
|
pluginRpcRouters: Map<string, unknown>;
|
|
60
64
|
pluginHttpHandlers: Map<string, (req: Request) => Promise<Response>>;
|
|
61
65
|
pluginContractRegistry: Map<string, unknown>;
|
|
62
|
-
}): {
|
|
66
|
+
}): {
|
|
67
|
+
collectorRegistry: CoreCollectorRegistry;
|
|
68
|
+
wsStore: WebSocketRouteStoreImpl;
|
|
69
|
+
readinessRegistry: CoreReadinessRegistry;
|
|
70
|
+
} {
|
|
63
71
|
// 1. Database Factory (Scoped)
|
|
64
72
|
registry.registerFactory(coreServices.database, async (metadata) => {
|
|
65
73
|
const { pluginId, previousPluginIds } = metadata;
|
|
@@ -356,6 +364,17 @@ export function registerCoreServices({
|
|
|
356
364
|
createScopedWsRegistry(globalWsStore, metadata.pluginId),
|
|
357
365
|
);
|
|
358
366
|
|
|
367
|
+
// 11. Readiness Registry (Scoped Factory)
|
|
368
|
+
// Plugins contribute probes that are aggregated by the /ready endpoint.
|
|
369
|
+
const globalReadinessRegistry = new CoreReadinessRegistry();
|
|
370
|
+
registry.registerFactory(coreServices.readinessRegistry, () =>
|
|
371
|
+
createScopedReadinessRegistry(globalReadinessRegistry),
|
|
372
|
+
);
|
|
373
|
+
|
|
359
374
|
// Return global registries for lifecycle cleanup
|
|
360
|
-
return {
|
|
375
|
+
return {
|
|
376
|
+
collectorRegistry: globalCollectorRegistry,
|
|
377
|
+
wsStore: globalWsStore,
|
|
378
|
+
readinessRegistry: globalReadinessRegistry,
|
|
379
|
+
};
|
|
361
380
|
}
|
|
@@ -56,6 +56,14 @@ export interface PluginLoaderDeps {
|
|
|
56
56
|
* Map of pluginId -> contract for OpenAPI generation.
|
|
57
57
|
*/
|
|
58
58
|
pluginContractRegistry: Map<string, AnyContractRouter>;
|
|
59
|
+
/**
|
|
60
|
+
* Called once `/api/:pluginId/*` is added to the root router and Phase 2
|
|
61
|
+
* (per-plugin init) is about to start. From this point on, plugin RPC
|
|
62
|
+
* routers come online incrementally as each plugin initializes — so
|
|
63
|
+
* self-referencing HTTP calls (e.g. RPC made from `afterPluginsReady`)
|
|
64
|
+
* can be allowed through the boot-time request gate without deadlocking.
|
|
65
|
+
*/
|
|
66
|
+
onApiRouteRegistered?: () => void;
|
|
59
67
|
}
|
|
60
68
|
|
|
61
69
|
/**
|
|
@@ -295,6 +303,19 @@ export async function loadPlugins({
|
|
|
295
303
|
});
|
|
296
304
|
registerApiRoute(rootRouter, apiHandler);
|
|
297
305
|
|
|
306
|
+
// Routes are now registered on the root router. Signal readiness so the
|
|
307
|
+
// server can stop blocking incoming requests in `waitForInit()`. We open
|
|
308
|
+
// the gate here (BEFORE Phase 2 / Phase 3) so that:
|
|
309
|
+
// - the static module-load endpoints (/api/plugins, /api/about, …) stop
|
|
310
|
+
// hanging behind the boot gate;
|
|
311
|
+
// - cross-plugin RPC calls made from `afterPluginsReady` can self-loop
|
|
312
|
+
// through the HTTP server without deadlocking on init completion.
|
|
313
|
+
// Plugin RPC routers come online incrementally as each plugin's Phase 2
|
|
314
|
+
// init runs; requests targeting a not-yet-initialized plugin fall through
|
|
315
|
+
// to the api-router's "Plugin metadata not found" 500, which is the
|
|
316
|
+
// pre-existing behavior and is preferable to a multi-second hang.
|
|
317
|
+
deps.onApiRouteRegistered?.();
|
|
318
|
+
|
|
298
319
|
for (const id of sortedIds) {
|
|
299
320
|
const p = pendingInits.find((x) => x.metadata.pluginId === id)!;
|
|
300
321
|
rootLogger.info(`🚀 Initializing ${p.metadata.pluginId}...`);
|
package/src/plugin-manager.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { adminPool, db } from "./db";
|
|
|
3
3
|
import { ServiceRegistry } from "./services/service-registry";
|
|
4
4
|
import type { CoreCollectorRegistry } from "./services/collector-registry";
|
|
5
5
|
import type { WebSocketRouteStoreImpl } from "./services/ws-route-registry";
|
|
6
|
+
import type { CoreReadinessRegistry } from "./services/readiness-registry";
|
|
6
7
|
import {
|
|
7
8
|
BackendPlugin,
|
|
8
9
|
ServiceRef,
|
|
@@ -54,7 +55,21 @@ export class PluginManager {
|
|
|
54
55
|
// Global WebSocket route store for server-level routing
|
|
55
56
|
private wsStore: WebSocketRouteStoreImpl;
|
|
56
57
|
|
|
58
|
+
// Global readiness registry — plugins contribute probes, /ready aggregates them
|
|
59
|
+
private readinessRegistry: CoreReadinessRegistry;
|
|
60
|
+
|
|
61
|
+
// Resolves once `/api/:pluginId/*` is registered on the root router and
|
|
62
|
+
// Phase 2 (per-plugin init) is starting. The HTTP server awaits this
|
|
63
|
+
// promise to know when it is safe to stop gating incoming requests.
|
|
64
|
+
// Held as a deferred so the listener (server) can be wired up before
|
|
65
|
+
// loadPlugins() runs.
|
|
66
|
+
private resolveRoutesReady!: () => void;
|
|
67
|
+
readonly routesReadyPromise: Promise<void>;
|
|
68
|
+
|
|
57
69
|
constructor() {
|
|
70
|
+
this.routesReadyPromise = new Promise<void>((resolve) => {
|
|
71
|
+
this.resolveRoutesReady = resolve;
|
|
72
|
+
});
|
|
58
73
|
const registries = registerCoreServices({
|
|
59
74
|
registry: this.registry,
|
|
60
75
|
adminPool,
|
|
@@ -64,6 +79,15 @@ export class PluginManager {
|
|
|
64
79
|
});
|
|
65
80
|
this.collectorRegistry = registries.collectorRegistry;
|
|
66
81
|
this.wsStore = registries.wsStore;
|
|
82
|
+
this.readinessRegistry = registries.readinessRegistry;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Get the global readiness registry so the server-level /ready endpoint
|
|
87
|
+
* can aggregate plugin-contributed probes.
|
|
88
|
+
*/
|
|
89
|
+
getReadinessRegistry(): CoreReadinessRegistry {
|
|
90
|
+
return this.readinessRegistry;
|
|
67
91
|
}
|
|
68
92
|
|
|
69
93
|
/**
|
|
@@ -124,8 +148,13 @@ export class PluginManager {
|
|
|
124
148
|
pluginMetadataRegistry: this.pluginMetadataRegistry,
|
|
125
149
|
cleanupHandlers: this.cleanupHandlers,
|
|
126
150
|
pluginContractRegistry: this.pluginContractRegistry,
|
|
151
|
+
onApiRouteRegistered: () => this.resolveRoutesReady(),
|
|
127
152
|
},
|
|
128
153
|
});
|
|
154
|
+
// Defensive: if loadPlugins returned without ever calling the callback
|
|
155
|
+
// (e.g. zero plugins discovered and no api route registered), unblock
|
|
156
|
+
// the server gate anyway — by this point Hono is fully configured.
|
|
157
|
+
this.resolveRoutesReady();
|
|
129
158
|
}
|
|
130
159
|
|
|
131
160
|
/**
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { Hono } from "hono";
|
|
3
|
+
import { TrieRouter } from "hono/router/trie-router";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Regression test for the "Hono router was already initialized" bug.
|
|
7
|
+
*
|
|
8
|
+
* Hono's default SmartRouter freezes its matcher on first request: any later
|
|
9
|
+
* `app.get/all/...` throws "Can not add a route since the matcher is already
|
|
10
|
+
* built". Plugins register routes during init() (and at runtime via
|
|
11
|
+
* loadSinglePlugin), so we use TrieRouter — which is incremental. If anyone
|
|
12
|
+
* ever swaps the router back to default, this test fails fast.
|
|
13
|
+
*
|
|
14
|
+
* See core/backend/src/index.ts where TrieRouter is wired up.
|
|
15
|
+
*/
|
|
16
|
+
describe("Hono router (TrieRouter) supports incremental route registration", () => {
|
|
17
|
+
it("accepts routes added after the first request", async () => {
|
|
18
|
+
const app = new Hono({ router: new TrieRouter() });
|
|
19
|
+
app.get("/early", (c) => c.text("early"));
|
|
20
|
+
|
|
21
|
+
// Trigger matcher build (this is what freezes SmartRouter).
|
|
22
|
+
const r1 = await app.fetch(new Request("http://x/early"));
|
|
23
|
+
expect(await r1.text()).toBe("early");
|
|
24
|
+
|
|
25
|
+
// Add a route AFTER the matcher is "built". On SmartRouter this throws.
|
|
26
|
+
app.get("/late", (c) => c.text("late"));
|
|
27
|
+
|
|
28
|
+
const r2 = await app.fetch(new Request("http://x/late"));
|
|
29
|
+
expect(r2.status).toBe(200);
|
|
30
|
+
expect(await r2.text()).toBe("late");
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("accepts a parameterized route added after a request", async () => {
|
|
34
|
+
const app = new Hono({ router: new TrieRouter() });
|
|
35
|
+
app.get("/seed", (c) => c.text("seed"));
|
|
36
|
+
await app.fetch(new Request("http://x/seed"));
|
|
37
|
+
|
|
38
|
+
// This is the actual production scenario: /api/:pluginId/* is registered
|
|
39
|
+
// inside loadPlugins() during init, well after the first request may have
|
|
40
|
+
// already been handled.
|
|
41
|
+
app.all("/api/:pluginId/*", (c) =>
|
|
42
|
+
c.json({ pluginId: c.req.param("pluginId") }),
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
const r = await app.fetch(new Request("http://x/api/healthcheck/foo"));
|
|
46
|
+
expect(r.status).toBe(200);
|
|
47
|
+
expect(await r.json()).toEqual({ pluginId: "healthcheck" });
|
|
48
|
+
});
|
|
49
|
+
});
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, mock } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
CoreReadinessRegistry,
|
|
4
|
+
createScopedReadinessRegistry,
|
|
5
|
+
} from "./readiness-registry";
|
|
6
|
+
import { createMockLogger } from "@checkstack/test-utils-backend";
|
|
7
|
+
|
|
8
|
+
const mockLogger = createMockLogger();
|
|
9
|
+
mock.module("../logger", () => ({
|
|
10
|
+
rootLogger: mockLogger,
|
|
11
|
+
}));
|
|
12
|
+
|
|
13
|
+
describe("CoreReadinessRegistry", () => {
|
|
14
|
+
let registry: CoreReadinessRegistry;
|
|
15
|
+
|
|
16
|
+
beforeEach(() => {
|
|
17
|
+
registry = new CoreReadinessRegistry();
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("starts empty", () => {
|
|
21
|
+
expect(registry.isEmpty()).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("evaluates to ready=true with no probes", async () => {
|
|
25
|
+
const snapshot = await registry.evaluate();
|
|
26
|
+
expect(snapshot.ready).toBe(true);
|
|
27
|
+
expect(snapshot.checks).toHaveLength(0);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("aggregates passing probes", async () => {
|
|
31
|
+
registry.register({
|
|
32
|
+
name: "db",
|
|
33
|
+
check: async () => ({ ok: true }),
|
|
34
|
+
});
|
|
35
|
+
registry.register({
|
|
36
|
+
name: "queue",
|
|
37
|
+
check: async () => ({ ok: true }),
|
|
38
|
+
});
|
|
39
|
+
const snapshot = await registry.evaluate();
|
|
40
|
+
expect(snapshot.ready).toBe(true);
|
|
41
|
+
expect(snapshot.checks).toHaveLength(2);
|
|
42
|
+
expect(snapshot.checks.every((c) => c.ok)).toBe(true);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("ready=false when a critical probe fails", async () => {
|
|
46
|
+
registry.register({
|
|
47
|
+
name: "db",
|
|
48
|
+
check: async () => ({ ok: false, message: "down" }),
|
|
49
|
+
});
|
|
50
|
+
const snapshot = await registry.evaluate();
|
|
51
|
+
expect(snapshot.ready).toBe(false);
|
|
52
|
+
expect(snapshot.checks[0].message).toBe("down");
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("ready=true when only a non-critical probe fails", async () => {
|
|
56
|
+
registry.register({
|
|
57
|
+
name: "warmup",
|
|
58
|
+
critical: false,
|
|
59
|
+
check: async () => ({ ok: false }),
|
|
60
|
+
});
|
|
61
|
+
registry.register({
|
|
62
|
+
name: "db",
|
|
63
|
+
check: async () => ({ ok: true }),
|
|
64
|
+
});
|
|
65
|
+
const snapshot = await registry.evaluate();
|
|
66
|
+
expect(snapshot.ready).toBe(true);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("treats thrown probes as failed and surfaces the error", async () => {
|
|
70
|
+
registry.register({
|
|
71
|
+
name: "boom",
|
|
72
|
+
check: async () => {
|
|
73
|
+
throw new Error("kaboom");
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
const snapshot = await registry.evaluate();
|
|
77
|
+
expect(snapshot.ready).toBe(false);
|
|
78
|
+
expect(snapshot.checks[0].ok).toBe(false);
|
|
79
|
+
expect(snapshot.checks[0].error).toBe("kaboom");
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("overwrites duplicate names with a warning", async () => {
|
|
83
|
+
registry.register({
|
|
84
|
+
name: "db",
|
|
85
|
+
check: async () => ({ ok: false }),
|
|
86
|
+
});
|
|
87
|
+
registry.register({
|
|
88
|
+
name: "db",
|
|
89
|
+
check: async () => ({ ok: true }),
|
|
90
|
+
});
|
|
91
|
+
const snapshot = await registry.evaluate();
|
|
92
|
+
expect(snapshot.checks).toHaveLength(1);
|
|
93
|
+
expect(snapshot.checks[0].ok).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("runs probes in parallel (total time ~ slowest probe)", async () => {
|
|
97
|
+
const delay = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
|
98
|
+
registry.register({
|
|
99
|
+
name: "slow-1",
|
|
100
|
+
check: async () => {
|
|
101
|
+
await delay(50);
|
|
102
|
+
return { ok: true };
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
registry.register({
|
|
106
|
+
name: "slow-2",
|
|
107
|
+
check: async () => {
|
|
108
|
+
await delay(50);
|
|
109
|
+
return { ok: true };
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
const start = Date.now();
|
|
113
|
+
await registry.evaluate();
|
|
114
|
+
const elapsed = Date.now() - start;
|
|
115
|
+
// Sequential would be ~100ms; parallel should be ~50ms.
|
|
116
|
+
expect(elapsed).toBeLessThan(95);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("scoped registry forwards register() to the global", () => {
|
|
120
|
+
const scoped = createScopedReadinessRegistry(registry);
|
|
121
|
+
scoped.register({ name: "x", check: async () => ({ ok: true }) });
|
|
122
|
+
expect(registry.isEmpty()).toBe(false);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ReadinessCheck,
|
|
3
|
+
ReadinessCheckResult,
|
|
4
|
+
ReadinessRegistry,
|
|
5
|
+
} from "@checkstack/backend-api";
|
|
6
|
+
import { extractErrorMessage } from "@checkstack/common";
|
|
7
|
+
import { rootLogger } from "../logger";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Snapshot returned to /ready callers.
|
|
11
|
+
*/
|
|
12
|
+
export interface ReadinessSnapshot {
|
|
13
|
+
ready: boolean;
|
|
14
|
+
checks: Array<{
|
|
15
|
+
name: string;
|
|
16
|
+
critical: boolean;
|
|
17
|
+
ok: boolean;
|
|
18
|
+
message?: string;
|
|
19
|
+
/** Set when the probe threw (treated as ok=false for critical checks). */
|
|
20
|
+
error?: string;
|
|
21
|
+
/** Wall-clock duration for the probe (milliseconds). */
|
|
22
|
+
durationMs: number;
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Core implementation backing both `coreServices.readinessRegistry` (plugin-facing)
|
|
28
|
+
* and the `/ready` endpoint (server-facing). Plugins call `register`; the server
|
|
29
|
+
* calls `evaluate()`.
|
|
30
|
+
*/
|
|
31
|
+
export class CoreReadinessRegistry {
|
|
32
|
+
private checks: ReadinessCheck[] = [];
|
|
33
|
+
|
|
34
|
+
register(check: ReadinessCheck): void {
|
|
35
|
+
if (this.checks.some((c) => c.name === check.name)) {
|
|
36
|
+
rootLogger.warn(
|
|
37
|
+
`ReadinessRegistry: probe '${check.name}' is already registered. Overwriting.`,
|
|
38
|
+
);
|
|
39
|
+
this.checks = this.checks.filter((c) => c.name !== check.name);
|
|
40
|
+
}
|
|
41
|
+
this.checks.push(check);
|
|
42
|
+
rootLogger.debug(
|
|
43
|
+
` -> Registered readiness probe '${check.name}' (critical=${check.critical ?? true})`,
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Run every probe in parallel. Critical failures set `ready = false`.
|
|
49
|
+
* Throws are caught and reported as `ok: false`.
|
|
50
|
+
*/
|
|
51
|
+
async evaluate(): Promise<ReadinessSnapshot> {
|
|
52
|
+
const results = await Promise.all(
|
|
53
|
+
this.checks.map(async (c) => {
|
|
54
|
+
const start = performance.now();
|
|
55
|
+
const critical = c.critical ?? true;
|
|
56
|
+
try {
|
|
57
|
+
const r: ReadinessCheckResult = await c.check();
|
|
58
|
+
return {
|
|
59
|
+
name: c.name,
|
|
60
|
+
critical,
|
|
61
|
+
ok: r.ok,
|
|
62
|
+
message: r.message,
|
|
63
|
+
durationMs: Math.round(performance.now() - start),
|
|
64
|
+
};
|
|
65
|
+
} catch (error) {
|
|
66
|
+
return {
|
|
67
|
+
name: c.name,
|
|
68
|
+
critical,
|
|
69
|
+
ok: false,
|
|
70
|
+
error: extractErrorMessage(error, String(error)),
|
|
71
|
+
durationMs: Math.round(performance.now() - start),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}),
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
const ready = results.every((r) => r.ok || !r.critical);
|
|
78
|
+
return { ready, checks: results };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Returns true while no probes are registered. Used to give a stable answer
|
|
83
|
+
* before plugins have had a chance to register their checks.
|
|
84
|
+
*/
|
|
85
|
+
isEmpty(): boolean {
|
|
86
|
+
return this.checks.length === 0;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Plugin-facing scoped view (currently identical to the underlying registry —
|
|
92
|
+
* we intentionally don't namespace probe names by plugin so operators can read
|
|
93
|
+
* them at a glance, but plugins are encouraged to prefix their own names).
|
|
94
|
+
*/
|
|
95
|
+
export function createScopedReadinessRegistry(
|
|
96
|
+
global: CoreReadinessRegistry,
|
|
97
|
+
): ReadinessRegistry {
|
|
98
|
+
return {
|
|
99
|
+
register(check: ReadinessCheck) {
|
|
100
|
+
global.register(check);
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
package/tsconfig.json
CHANGED
|
@@ -2,5 +2,37 @@
|
|
|
2
2
|
"extends": "@checkstack/tsconfig/backend.json",
|
|
3
3
|
"include": [
|
|
4
4
|
"src"
|
|
5
|
+
],
|
|
6
|
+
"references": [
|
|
7
|
+
{
|
|
8
|
+
"path": "../api-docs-common"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"path": "../auth-common"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"path": "../backend-api"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"path": "../cache-api"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"path": "../common"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"path": "../drizzle-helper"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"path": "../queue-api"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"path": "../signal-backend"
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"path": "../signal-common"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"path": "../test-utils-backend"
|
|
36
|
+
}
|
|
5
37
|
]
|
|
6
|
-
}
|
|
38
|
+
}
|