threadforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +152 -0
- package/bin/forge.js +1050 -0
- package/bin/host-commands.js +344 -0
- package/bin/platform-commands.js +570 -0
- package/package.json +71 -0
- package/shared/auth.js +475 -0
- package/src/core/DirectMessageBus.js +364 -0
- package/src/core/EndpointResolver.js +247 -0
- package/src/core/ForgeContext.js +2227 -0
- package/src/core/ForgeHost.js +122 -0
- package/src/core/ForgePlatform.js +145 -0
- package/src/core/Ingress.js +768 -0
- package/src/core/Interceptors.js +420 -0
- package/src/core/MessageBus.js +310 -0
- package/src/core/Prometheus.js +305 -0
- package/src/core/RequestContext.js +413 -0
- package/src/core/RoutingStrategy.js +316 -0
- package/src/core/Supervisor.js +1306 -0
- package/src/core/ThreadAllocator.js +196 -0
- package/src/core/WorkerChannelManager.js +879 -0
- package/src/core/config.js +624 -0
- package/src/core/host-config.js +311 -0
- package/src/core/network-utils.js +166 -0
- package/src/core/platform-config.js +308 -0
- package/src/decorators/ServiceProxy.js +899 -0
- package/src/decorators/index.js +571 -0
- package/src/deploy/NginxGenerator.js +865 -0
- package/src/deploy/PlatformManifestGenerator.js +96 -0
- package/src/deploy/RouteManifestGenerator.js +112 -0
- package/src/deploy/index.js +984 -0
- package/src/frontend/FrontendDevLifecycle.js +65 -0
- package/src/frontend/FrontendPluginOrchestrator.js +187 -0
- package/src/frontend/SiteResolver.js +63 -0
- package/src/frontend/StaticMountRegistry.js +90 -0
- package/src/frontend/index.js +5 -0
- package/src/frontend/plugins/index.js +2 -0
- package/src/frontend/plugins/viteFrontend.js +79 -0
- package/src/frontend/types.js +35 -0
- package/src/index.js +56 -0
- package/src/internals.js +31 -0
- package/src/plugins/PluginManager.js +537 -0
- package/src/plugins/ScopedPostgres.js +192 -0
- package/src/plugins/ScopedRedis.js +142 -0
- package/src/plugins/index.js +1729 -0
- package/src/registry/ServiceRegistry.js +796 -0
- package/src/scaling/ScaleAdvisor.js +442 -0
- package/src/services/Service.js +195 -0
- package/src/services/worker-bootstrap.js +676 -0
- package/src/templates/auth-service.js +65 -0
- package/src/templates/identity-service.js +75 -0
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker Bootstrap v3
|
|
3
|
+
*
|
|
4
|
+
* Loads services, builds proxy clients, and injects them.
|
|
5
|
+
*
|
|
6
|
+
* After bootstrap, a service can call:
|
|
7
|
+
* await this.users.getUser('123')
|
|
8
|
+
*
|
|
9
|
+
* Which transparently routes through:
|
|
10
|
+
* - Direct function call (colocated services, same process)
|
|
11
|
+
* - UDS (different process, same machine)
|
|
12
|
+
* - Supervisor IPC fallback (startup race)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import fs from "node:fs";
|
|
16
|
+
import path from "node:path";
|
|
17
|
+
import { pathToFileURL } from "node:url";
|
|
18
|
+
import { EndpointResolver } from "../core/EndpointResolver.js";
|
|
19
|
+
import { ForgeContext, NOT_HANDLED } from "../core/ForgeContext.js";
|
|
20
|
+
import { RequestContext } from "../core/RequestContext.js";
|
|
21
|
+
import { buildServiceProxies, createServiceProxy } from "../decorators/ServiceProxy.js";
|
|
22
|
+
import { resolveStaticMountsForService } from "../frontend/SiteResolver.js";
|
|
23
|
+
|
|
24
|
+
// A6: Consolidate all env var reads into a single structured config object
|
|
25
|
+
function parseWorkerConfig() {
|
|
26
|
+
return {
|
|
27
|
+
groupName: process.env.FORGE_GROUP_NAME,
|
|
28
|
+
serviceEntries: process.env.FORGE_SERVICE_ENTRIES,
|
|
29
|
+
serviceNames: process.env.FORGE_SERVICE_NAMES,
|
|
30
|
+
port: process.env.FORGE_PORT,
|
|
31
|
+
workerId: process.env.FORGE_WORKER_ID,
|
|
32
|
+
threadCount: process.env.FORGE_THREAD_COUNT,
|
|
33
|
+
mode: process.env.FORGE_MODE,
|
|
34
|
+
serviceTypes: process.env.FORGE_SERVICE_TYPES,
|
|
35
|
+
channels: process.env.FORGE_CHANNELS,
|
|
36
|
+
hostMeta: process.env.FORGE_HOST_META,
|
|
37
|
+
registryMode: process.env.FORGE_REGISTRY_MODE ?? "embedded",
|
|
38
|
+
registryHost: process.env.FORGE_HOST,
|
|
39
|
+
servicePorts: process.env.FORGE_SERVICE_PORTS,
|
|
40
|
+
plugins: process.env.FORGE_PLUGINS,
|
|
41
|
+
servicePlugins: process.env.FORGE_SERVICE_PLUGINS,
|
|
42
|
+
configPath: process.env.FORGE_CONFIG_PATH,
|
|
43
|
+
serviceEndpoints: process.env.FORGE_SERVICE_ENDPOINTS,
|
|
44
|
+
sites: process.env.FORGE_SITES,
|
|
45
|
+
sitesFile: process.env.FORGE_SITES_FILE,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const workerConfig = parseWorkerConfig();
|
|
50
|
+
const hostMeta = workerConfig.hostMeta ? JSON.parse(workerConfig.hostMeta) : null;
|
|
51
|
+
let sites = null;
|
|
52
|
+
let sitesPayload = workerConfig.sites;
|
|
53
|
+
if (!sitesPayload && workerConfig.sitesFile) {
|
|
54
|
+
try {
|
|
55
|
+
sitesPayload = fs.readFileSync(workerConfig.sitesFile, "utf8");
|
|
56
|
+
} catch (err) {
|
|
57
|
+
console.warn(`[ThreadForge] Could not read FORGE_SITES_FILE, static mounts disabled: ${err.message}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (sitesPayload) {
|
|
61
|
+
try {
|
|
62
|
+
sites = JSON.parse(sitesPayload);
|
|
63
|
+
} catch (err) {
|
|
64
|
+
console.warn(`[ThreadForge] Invalid FORGE_SITES payload, static mounts disabled: ${err.message}`);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function _deliverRemoteEvent(url, body, headers, serviceName, maxRetries = 3) {
|
|
69
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
70
|
+
try {
|
|
71
|
+
const resp = await fetch(url, {
|
|
72
|
+
method: 'POST',
|
|
73
|
+
headers,
|
|
74
|
+
body: JSON.stringify(body),
|
|
75
|
+
signal: AbortSignal.timeout(5000),
|
|
76
|
+
});
|
|
77
|
+
if (resp.ok) return;
|
|
78
|
+
// Non-retryable status codes
|
|
79
|
+
if (resp.status >= 400 && resp.status < 500) return;
|
|
80
|
+
} catch (err) {
|
|
81
|
+
if (attempt === maxRetries - 1) {
|
|
82
|
+
console.error(`[ThreadForge] Remote event delivery to ${serviceName} failed after ${maxRetries} attempts: ${err.message}`);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Exponential backoff: 100ms, 200ms, 400ms
|
|
87
|
+
await new Promise(resolve => setTimeout(resolve, 100 * 2 ** attempt));
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Look up which project owns a given service name.
|
|
93
|
+
* Returns { projectId, schema, keyPrefix } or null.
|
|
94
|
+
*/
|
|
95
|
+
function resolveProjectForService(serviceName) {
|
|
96
|
+
if (!hostMeta) return null;
|
|
97
|
+
for (const [projectId, meta] of Object.entries(hostMeta)) {
|
|
98
|
+
if (meta.services.includes(serviceName)) {
|
|
99
|
+
return { projectId, schema: meta.schema, keyPrefix: meta.keyPrefix };
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Hoisted reference so emergency shutdown handlers can access it
|
|
106
|
+
/** @type {Map<string, {service: object, ctx: import('../core/ForgeContext.js').ForgeContext}>|null} */
|
|
107
|
+
let _localServices = null;
|
|
108
|
+
let emergencyShutdownPromise = null;
|
|
109
|
+
// M-CORE-4: Module-level flag so emergencyShutdown can check if normal shutdown is running
|
|
110
|
+
let shutdownInProgress = false;
|
|
111
|
+
|
|
112
|
+
// Track unhandled rejections — exit only if 5+ within 60 seconds
|
|
113
|
+
let _rejectionCount = 0;
|
|
114
|
+
let _rejectionWindowStart = Date.now();
|
|
115
|
+
const REJECTION_THRESHOLD = 5;
|
|
116
|
+
const REJECTION_WINDOW_MS = 60_000;
|
|
117
|
+
|
|
118
|
+
async function emergencyShutdown(reason, err) {
|
|
119
|
+
if (emergencyShutdownPromise) {
|
|
120
|
+
await emergencyShutdownPromise.catch(() => {});
|
|
121
|
+
process.exit(1);
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// M-CORE-4: Skip service._stop() if normal shutdown is already running
|
|
125
|
+
// to prevent double-cleanup of services
|
|
126
|
+
const skipServiceStop = shutdownInProgress;
|
|
127
|
+
emergencyShutdownPromise = (async () => {
|
|
128
|
+
console.error(`[ThreadForge] ${reason}:`, err);
|
|
129
|
+
try {
|
|
130
|
+
if (_localServices && !skipServiceStop) {
|
|
131
|
+
for (const [, { service }] of _localServices) {
|
|
132
|
+
try {
|
|
133
|
+
await Promise.race([
|
|
134
|
+
service._stop?.(),
|
|
135
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Stop timed out')), 5000)),
|
|
136
|
+
]);
|
|
137
|
+
} catch {}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
} catch {}
|
|
141
|
+
process.exit(1);
|
|
142
|
+
})();
|
|
143
|
+
await emergencyShutdownPromise;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function handleUnhandledRejection(err) {
|
|
147
|
+
const now = Date.now();
|
|
148
|
+
|
|
149
|
+
// Reset window if it has elapsed
|
|
150
|
+
if (now - _rejectionWindowStart > REJECTION_WINDOW_MS) {
|
|
151
|
+
_rejectionCount = 0;
|
|
152
|
+
_rejectionWindowStart = now;
|
|
153
|
+
}
|
|
154
|
+
_rejectionCount++;
|
|
155
|
+
|
|
156
|
+
console.error(`[ThreadForge] SEVERE: Unhandled rejection (${_rejectionCount}/${REJECTION_THRESHOLD} in window):`, err);
|
|
157
|
+
if (err?.stack) console.error(err.stack);
|
|
158
|
+
|
|
159
|
+
if (_rejectionCount >= REJECTION_THRESHOLD) {
|
|
160
|
+
console.error(`[ThreadForge] ${REJECTION_THRESHOLD}+ unhandled rejections within ${REJECTION_WINDOW_MS / 1000}s — exiting`);
|
|
161
|
+
emergencyShutdown("Repeated unhandled rejections", err);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async function bootstrap() {
|
|
166
|
+
// RT-H2: Register emergency handlers early so exceptions during any phase get cleanup
|
|
167
|
+
process.on("uncaughtException", (err) => emergencyShutdown("Uncaught exception", err));
|
|
168
|
+
process.on("unhandledRejection", (err) => handleUnhandledRejection(err));
|
|
169
|
+
|
|
170
|
+
// RT-C2: Re-entrancy guard for graceful shutdown (uses module-level `shutdownInProgress`)
|
|
171
|
+
|
|
172
|
+
// H-RT-4: Register signal handlers early so SIGTERM/SIGINT during any phase triggers cleanup
|
|
173
|
+
process.once("SIGTERM", () => shutdown("SIGTERM"));
|
|
174
|
+
process.once("SIGINT", () => shutdown("SIGINT"));
|
|
175
|
+
|
|
176
|
+
const entries = workerConfig.serviceEntries.split(",")
|
|
177
|
+
.filter(e => e.trim())
|
|
178
|
+
.map((e) => {
|
|
179
|
+
const eqIdx = e.indexOf("=");
|
|
180
|
+
if (eqIdx === -1) {
|
|
181
|
+
throw new Error(`Invalid FORGE_SERVICE_ENTRIES format: "${e}". Expected "name=path".`);
|
|
182
|
+
}
|
|
183
|
+
const name = e.slice(0, eqIdx).trim();
|
|
184
|
+
const entryPath = e.slice(eqIdx + 1).trim();
|
|
185
|
+
if (!name || !entryPath) {
|
|
186
|
+
throw new Error(`Invalid FORGE_SERVICE_ENTRIES entry: "${e}". Name and path are both required.`);
|
|
187
|
+
}
|
|
188
|
+
return { name, entry: entryPath };
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
if (entries.length === 0) {
|
|
192
|
+
throw new Error("FORGE_SERVICE_ENTRIES is empty or contains no valid entries");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const typeMap = {};
|
|
196
|
+
if (workerConfig.serviceTypes) {
|
|
197
|
+
for (const pair of workerConfig.serviceTypes.split(",")) {
|
|
198
|
+
const [name, type] = pair.split("=");
|
|
199
|
+
typeMap[name] = type;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Parse declared channels to know which services we might talk to
|
|
204
|
+
let declaredChannels = [];
|
|
205
|
+
try {
|
|
206
|
+
declaredChannels = JSON.parse(workerConfig.channels || "[]");
|
|
207
|
+
} catch {}
|
|
208
|
+
|
|
209
|
+
const port = parseInt(workerConfig.port, 10);
|
|
210
|
+
const workerId = parseInt(workerConfig.workerId, 10);
|
|
211
|
+
const threadCount = parseInt(workerConfig.threadCount, 10);
|
|
212
|
+
|
|
213
|
+
// Phase 1: Load all service modules (P13: parallel imports)
|
|
214
|
+
/** @type {Map<string, {ServiceClass: Function, instance: object}>} */
|
|
215
|
+
const loaded = new Map();
|
|
216
|
+
/** @type {Map<string, Function>} service name → class (for proxy building) */
|
|
217
|
+
const serviceClasses = new Map();
|
|
218
|
+
|
|
219
|
+
const resolvedEntries = entries.map(({ name, entry }) => ({
|
|
220
|
+
name,
|
|
221
|
+
entry,
|
|
222
|
+
url: pathToFileURL(path.resolve(process.cwd(), entry)).href,
|
|
223
|
+
}));
|
|
224
|
+
|
|
225
|
+
const importResults = await Promise.all(
|
|
226
|
+
resolvedEntries.map(async ({ name, entry, url }) => {
|
|
227
|
+
try {
|
|
228
|
+
const mod = await import(url);
|
|
229
|
+
return { name, entry, mod, error: null };
|
|
230
|
+
} catch (err) {
|
|
231
|
+
return { name, entry, mod: null, error: err };
|
|
232
|
+
}
|
|
233
|
+
}),
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
for (const { name, entry, mod, error } of importResults) {
|
|
237
|
+
if (error) {
|
|
238
|
+
console.error(`[ThreadForge] Failed to load service "${name}" from ${entry}: ${error.message}`);
|
|
239
|
+
if (error.stack) console.error(error.stack);
|
|
240
|
+
throw new Error(`Service "${name}" failed to load from "${entry}": ${error.message}`);
|
|
241
|
+
}
|
|
242
|
+
const ServiceClass = mod.default ?? mod;
|
|
243
|
+
|
|
244
|
+
if (typeof ServiceClass !== "function") {
|
|
245
|
+
throw new Error(`Service "${entry}" must export a class. Got: ${typeof ServiceClass}`);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const instance = new ServiceClass();
|
|
249
|
+
loaded.set(name, { ServiceClass, instance });
|
|
250
|
+
serviceClasses.set(name, ServiceClass);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Also register remote service classes if we know about them
|
|
254
|
+
// (from channels config). For remote services we don't have the
|
|
255
|
+
// class, so proxies will use dynamic dispatch.
|
|
256
|
+
const allConnectedServices = new Set();
|
|
257
|
+
for (const ch of declaredChannels) {
|
|
258
|
+
allConnectedServices.add(ch.from);
|
|
259
|
+
allConnectedServices.add(ch.to);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Phase 2: Create contexts and local service registry
|
|
263
|
+
/** @type {Map<string, {service: object, ctx: ForgeContext}>} */
|
|
264
|
+
const localServices = new Map();
|
|
265
|
+
_localServices = localServices;
|
|
266
|
+
|
|
267
|
+
function localSend(fromName, target, payload) {
|
|
268
|
+
const local = localServices.get(target);
|
|
269
|
+
if (!local) return false;
|
|
270
|
+
Promise.resolve(local.service.onMessage(fromName, payload)).catch((err) => {
|
|
271
|
+
local.ctx?.logger?.error?.("onMessage error", { from: fromName, error: err.message });
|
|
272
|
+
local.ctx?.metrics?.increment?.("forge_local_send_errors_total", { target });
|
|
273
|
+
});
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
async function localRequest(fromName, target, payload) {
|
|
278
|
+
const local = localServices.get(target);
|
|
279
|
+
if (local) {
|
|
280
|
+
return local.service.onRequest(fromName, payload);
|
|
281
|
+
}
|
|
282
|
+
return NOT_HANDLED;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Create EndpointResolver from env — shared by all contexts in this worker
|
|
286
|
+
const endpointResolver = EndpointResolver.fromEnv();
|
|
287
|
+
|
|
288
|
+
for (const [name, { instance }] of loaded) {
|
|
289
|
+
const serviceType = typeMap[name] ?? "internal";
|
|
290
|
+
const isEdge = serviceType === "edge";
|
|
291
|
+
|
|
292
|
+
const ctx = new ForgeContext({
|
|
293
|
+
serviceName: name,
|
|
294
|
+
port: isEdge ? port : 0,
|
|
295
|
+
workerId,
|
|
296
|
+
threadCount,
|
|
297
|
+
mode: workerConfig.mode,
|
|
298
|
+
serviceType,
|
|
299
|
+
sendIPC: (msg) => {
|
|
300
|
+
if (process.send) process.send(msg);
|
|
301
|
+
},
|
|
302
|
+
localSend: (target, payload) => localSend(name, target, payload),
|
|
303
|
+
localRequest: (target, payload) => localRequest(name, target, payload),
|
|
304
|
+
staticMounts: resolveStaticMountsForService(name, sites),
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
ctx._endpointResolver = endpointResolver;
|
|
308
|
+
|
|
309
|
+
const projectInfo = resolveProjectForService(name);
|
|
310
|
+
if (projectInfo) {
|
|
311
|
+
ctx._projectId = projectInfo.projectId;
|
|
312
|
+
ctx._projectSchema = projectInfo.schema;
|
|
313
|
+
ctx._projectKeyPrefix = projectInfo.keyPrefix;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
ctx._emitEvent = (eventName, data) => {
|
|
317
|
+
const eventPayload = { __forge_event: eventName, __forge_data: data };
|
|
318
|
+
|
|
319
|
+
for (const ch of declaredChannels) {
|
|
320
|
+
let target;
|
|
321
|
+
if (ch.from === name) target = ch.to;
|
|
322
|
+
else if (ch.to === name) target = ch.from;
|
|
323
|
+
else continue;
|
|
324
|
+
|
|
325
|
+
if (localSend(name, target, eventPayload)) continue;
|
|
326
|
+
|
|
327
|
+
const endpoint = endpointResolver.resolve(target);
|
|
328
|
+
if (endpoint?.remote) {
|
|
329
|
+
// Propagate RequestContext so events can be traced back to originating requests
|
|
330
|
+
const rctx = RequestContext.current();
|
|
331
|
+
const headers = { "Content-Type": "application/json" };
|
|
332
|
+
if (rctx) Object.assign(headers, rctx.toHeaders());
|
|
333
|
+
_deliverRemoteEvent(
|
|
334
|
+
`http://${endpoint.host}:${endpoint.port}/__forge/event`,
|
|
335
|
+
{ from: name, event: eventName, data },
|
|
336
|
+
headers,
|
|
337
|
+
target,
|
|
338
|
+
);
|
|
339
|
+
} else {
|
|
340
|
+
ctx.send(target, eventPayload);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
await instance._init(ctx);
|
|
346
|
+
ctx._serviceInstance = instance; // for /__forge/invoke endpoint
|
|
347
|
+
localServices.set(name, { service: instance, ctx });
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Phase 2b: Dynamic registry discovery
|
|
351
|
+
if (workerConfig.registryMode !== "embedded") {
|
|
352
|
+
try {
|
|
353
|
+
const { ServiceRegistry } = await import("../registry/ServiceRegistry.js");
|
|
354
|
+
const workerRegistry = new ServiceRegistry({
|
|
355
|
+
mode: workerConfig.registryMode,
|
|
356
|
+
host: workerConfig.registryHost || undefined,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
workerRegistry.on("discovered", (reg) => {
|
|
360
|
+
if (reg.ports?.http) {
|
|
361
|
+
endpointResolver.set(reg.name, {
|
|
362
|
+
host: reg.host,
|
|
363
|
+
port: reg.ports.http,
|
|
364
|
+
remote: true,
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
workerRegistry.on("removed", (reg) => {
|
|
370
|
+
if (reg.ports?.http) {
|
|
371
|
+
endpointResolver.remove(reg.name, reg.host, reg.ports.http);
|
|
372
|
+
}
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
await workerRegistry.start();
|
|
376
|
+
} catch (err) {
|
|
377
|
+
// Don't crash on registry errors — static endpoints still work
|
|
378
|
+
console.error(`[ThreadForge] Worker registry init failed: ${err.message}`);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Phase 3: Build and inject proxy clients
|
|
383
|
+
// A2: Delegate all proxy creation to ServiceProxy — no inline retry/circuit-breaker reimplementation
|
|
384
|
+
const servicePorts = JSON.parse(workerConfig.servicePorts || "{}");
|
|
385
|
+
|
|
386
|
+
for (const [name, { service }] of localServices) {
|
|
387
|
+
const ctx = localServices.get(name).ctx;
|
|
388
|
+
const proxies = buildServiceProxies(ctx, serviceClasses, localServices);
|
|
389
|
+
|
|
390
|
+
for (const svcName of allConnectedServices) {
|
|
391
|
+
if (svcName === name) continue;
|
|
392
|
+
if (proxies[svcName]) continue;
|
|
393
|
+
|
|
394
|
+
// Service not in serviceClasses (remote/external) — use dynamic proxy via ServiceProxy
|
|
395
|
+
proxies[svcName] = createServiceProxy(ctx, svcName, null, null, {});
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
service._setProxies(proxies);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Phase 4: Connect plugins and inject into services
|
|
402
|
+
let pluginManager = null;
|
|
403
|
+
try {
|
|
404
|
+
const pluginNames = JSON.parse(workerConfig.plugins || "[]");
|
|
405
|
+
const servicePluginMap = JSON.parse(workerConfig.servicePlugins || "{}");
|
|
406
|
+
|
|
407
|
+
if (pluginNames.length > 0) {
|
|
408
|
+
const { PluginManager } = await import("../plugins/PluginManager.js");
|
|
409
|
+
pluginManager = new PluginManager();
|
|
410
|
+
|
|
411
|
+
if (workerConfig.configPath) {
|
|
412
|
+
const configMod = await import(workerConfig.configPath);
|
|
413
|
+
const config = configMod.default ?? configMod;
|
|
414
|
+
if (config.plugins) {
|
|
415
|
+
let plugins = config.plugins;
|
|
416
|
+
|
|
417
|
+
if (hostMeta) {
|
|
418
|
+
const { scopedPostgres } = await import("../plugins/ScopedPostgres.js");
|
|
419
|
+
const { scopedRedis } = await import("../plugins/ScopedRedis.js");
|
|
420
|
+
plugins = plugins.map((p) => {
|
|
421
|
+
if (p.name === "postgres") return scopedPostgres(p._options ?? {});
|
|
422
|
+
if (p.name === "redis") return scopedRedis(p._options ?? {});
|
|
423
|
+
return p;
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
pluginManager.register(plugins);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// P14: Connect plugins for all services in parallel
|
|
432
|
+
const pluginEntries = [...localServices.entries()];
|
|
433
|
+
const pluginResults = await Promise.all(
|
|
434
|
+
pluginEntries.map(async ([svcName, { service, ctx }]) => {
|
|
435
|
+
const svcPlugins = servicePluginMap[svcName];
|
|
436
|
+
const clients = await pluginManager.connectForService(svcPlugins, ctx);
|
|
437
|
+
return { svcName, service, ctx, clients, svcPlugins };
|
|
438
|
+
}),
|
|
439
|
+
);
|
|
440
|
+
|
|
441
|
+
for (const { service, ctx, clients, svcPlugins } of pluginResults) {
|
|
442
|
+
// Inject clients as properties on the service
|
|
443
|
+
for (const [injectName, client] of clients) {
|
|
444
|
+
if (injectName.startsWith("_")) continue; // skip internal plugins (cors, etc.)
|
|
445
|
+
service[injectName] = client;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// Apply plugin middleware
|
|
449
|
+
const middleware = pluginManager.getMiddleware(svcPlugins);
|
|
450
|
+
for (const mw of middleware) {
|
|
451
|
+
ctx.router.use(mw);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Attach websocket lifecycle hooks for this service
|
|
455
|
+
ctx._wsPluginHooks = pluginManager.getWebSocketHooks(svcPlugins);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
} catch (err) {
|
|
459
|
+
// Don't crash on plugin errors — log and continue
|
|
460
|
+
console.error(`[ThreadForge] Plugin init failed for ${workerConfig.groupName}: ${err.message}`);
|
|
461
|
+
if (err.stack) console.error(err.stack);
|
|
462
|
+
for (const [, { ctx }] of localServices) {
|
|
463
|
+
try {
|
|
464
|
+
ctx?.logger?.error(`Plugin init failed: ${err.message}`);
|
|
465
|
+
} catch {}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// O4: Correct shutdown order — stop accepting connections first, then drain, then stop services, then disconnect plugins
|
|
470
|
+
// RT-C1: shutdown is a function declaration (hoisted) to avoid TDZ when called from IPC handler
|
|
471
|
+
// RT-C2: re-entrancy guard prevents double shutdown from SIGTERM + IPC race
|
|
472
|
+
async function shutdown(signal) {
|
|
473
|
+
if (shutdownInProgress) return;
|
|
474
|
+
shutdownInProgress = true;
|
|
475
|
+
|
|
476
|
+
for (const [name, { ctx }] of localServices) {
|
|
477
|
+
ctx.logger.info(`Received ${signal}, shutting down ${name}...`);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Step 1: Stop accepting new connections (server.close())
|
|
481
|
+
const serverClosePromises = [];
|
|
482
|
+
for (const [, { ctx }] of localServices) {
|
|
483
|
+
if (ctx._server) {
|
|
484
|
+
serverClosePromises.push(
|
|
485
|
+
new Promise((resolve) => {
|
|
486
|
+
ctx._server.close(() => resolve());
|
|
487
|
+
})
|
|
488
|
+
);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Step 2: Wait for in-flight requests to drain (up to 5s)
|
|
493
|
+
if (serverClosePromises.length > 0) {
|
|
494
|
+
await Promise.race([
|
|
495
|
+
Promise.all(serverClosePromises),
|
|
496
|
+
new Promise((resolve) => setTimeout(resolve, 5000)),
|
|
497
|
+
]);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Step 3: Stop services (onStop hooks, with 5s timeout per service)
|
|
501
|
+
for (const [name, { service, ctx }] of localServices) {
|
|
502
|
+
try {
|
|
503
|
+
await Promise.race([
|
|
504
|
+
service._stop(),
|
|
505
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error(`Service "${name}" stop timed out after 5s`)), 5000)),
|
|
506
|
+
]);
|
|
507
|
+
} catch (err) {
|
|
508
|
+
ctx.logger.error("Shutdown error", { error: err.message });
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Step 4: Disconnect all plugins
|
|
513
|
+
if (pluginManager) {
|
|
514
|
+
try {
|
|
515
|
+
const errors = await pluginManager.disconnectAll(localServices.values().next().value?.ctx?.logger);
|
|
516
|
+
if (errors.length > 0) {
|
|
517
|
+
console.warn(`[ThreadForge] ${errors.length} plugins failed to disconnect cleanly`);
|
|
518
|
+
}
|
|
519
|
+
} catch (err) {
|
|
520
|
+
console.warn(`[ThreadForge] Plugin disconnect error: ${err.message}`);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
process.exit(0);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Wire IPC before starting so no messages are lost during startup
|
|
528
|
+
process.on("message", (msg, _handle) => {
|
|
529
|
+
if (typeof msg !== "object" || msg === null || !msg.type) return;
|
|
530
|
+
|
|
531
|
+
if (msg.type === "forge:shutdown") {
|
|
532
|
+
// Stop accepting new requests, let in-flight complete
|
|
533
|
+
shutdown("forge:shutdown");
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// IPC-C3: Socket messages handled only by WorkerChannelManager — skip here to avoid duplicates
|
|
538
|
+
if (msg.type === "forge:init-socket" || msg.type === "forge:socket-registry") return;
|
|
539
|
+
|
|
540
|
+
if (msg.type === "forge:health-check") {
|
|
541
|
+
for (const [, { ctx }] of localServices) {
|
|
542
|
+
ctx._handleIPCMessage(msg);
|
|
543
|
+
}
|
|
544
|
+
return;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
if (msg.type === "forge:metrics-snapshot") {
|
|
548
|
+
if (!process.send) return;
|
|
549
|
+
try {
|
|
550
|
+
const chunks = [];
|
|
551
|
+
for (const [, { ctx }] of localServices) {
|
|
552
|
+
if (ctx?.metrics?.expose) {
|
|
553
|
+
chunks.push(ctx.metrics.expose());
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
process.send({
|
|
557
|
+
type: "forge:metrics-snapshot-response",
|
|
558
|
+
requestId: msg.requestId,
|
|
559
|
+
metrics: chunks.join("\n"),
|
|
560
|
+
});
|
|
561
|
+
} catch (err) {
|
|
562
|
+
process.send({
|
|
563
|
+
type: "forge:metrics-snapshot-response",
|
|
564
|
+
requestId: msg.requestId,
|
|
565
|
+
error: err.message,
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
return;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
if (msg.type === "forge:message" || msg.type === "forge:request" || msg.type === "forge:response") {
|
|
572
|
+
for (const [, { ctx }] of localServices) {
|
|
573
|
+
ctx._handleIPCMessage(msg);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
// Request socket setup (supervisor may have sent init-socket before we were listening)
|
|
579
|
+
if (process.send) {
|
|
580
|
+
process.send({ type: "forge:worker-ready", group: workerConfig.groupName });
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// Phase 5: Start all services — track started services for cleanup on failure (RT-H1)
|
|
584
|
+
const startedServices = [];
|
|
585
|
+
try {
|
|
586
|
+
for (const [name, { service, ctx }] of localServices) {
|
|
587
|
+
await service._start();
|
|
588
|
+
startedServices.push({ name, service, ctx });
|
|
589
|
+
|
|
590
|
+
const proxyNames = Object.keys(
|
|
591
|
+
Object.fromEntries(Object.entries(service).filter(([_k, v]) => v?.$name || v?.$isLocal !== undefined)),
|
|
592
|
+
);
|
|
593
|
+
|
|
594
|
+
// Reduce startup noise: emit framework startup metadata once per group.
|
|
595
|
+
if (workerId === 0) {
|
|
596
|
+
ctx.logger.info("Service started", {
|
|
597
|
+
group: workerConfig.groupName,
|
|
598
|
+
service: name,
|
|
599
|
+
type: typeMap[name] ?? "internal",
|
|
600
|
+
port: ctx.port || null,
|
|
601
|
+
worker: workerId,
|
|
602
|
+
pid: process.pid,
|
|
603
|
+
colocated: entries.length > 1 ? entries.map((e) => e.name) : undefined,
|
|
604
|
+
proxies: proxyNames.length > 0 ? proxyNames : undefined,
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
} catch (startErr) {
|
|
609
|
+
// Check if this is a fatal bind error (EPERM, EACCES, EADDRNOTAVAIL, EADDRINUSE)
|
|
610
|
+
if (startErr.fatalBindError) {
|
|
611
|
+
// Log clear error message and exit without triggering restart loop
|
|
612
|
+
console.error(`\n[ThreadForge] FATAL: ${startErr.userMessage || startErr.message}`);
|
|
613
|
+
console.error(`[ThreadForge] Service group "${workerConfig.groupName}" cannot start. Worker will not restart.\n`);
|
|
614
|
+
// Notify supervisor this is a fatal error (already sent via IPC from ForgeContext)
|
|
615
|
+
// Exit with code 100 to signal fatal configuration error (supervisor checks this)
|
|
616
|
+
process.exit(100);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
// RT-H1: Stop already-started services in reverse order before re-throwing
|
|
620
|
+
for (let i = startedServices.length - 1; i >= 0; i--) {
|
|
621
|
+
const { name, service, ctx } = startedServices[i];
|
|
622
|
+
try {
|
|
623
|
+
ctx.logger.warn(`Rolling back service start for ${name} due to Phase 5 failure`);
|
|
624
|
+
await service._stop();
|
|
625
|
+
} catch (stopErr) {
|
|
626
|
+
ctx.logger.error(`Rollback stop failed for ${name}`, { error: stopErr.message });
|
|
627
|
+
}
|
|
628
|
+
try {
|
|
629
|
+
if (ctx._server) {
|
|
630
|
+
await new Promise((resolve) => ctx._server.close(resolve));
|
|
631
|
+
}
|
|
632
|
+
} catch {}
|
|
633
|
+
}
|
|
634
|
+
throw startErr;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Phase 6: Auto-register health check (always, not just with plugins)
|
|
638
|
+
for (const [, { ctx }] of localServices) {
|
|
639
|
+
const existingHealthRoute = [...ctx.router.routes.values()].some((bucket) =>
|
|
640
|
+
bucket.some((r) => r.pattern === "/health"),
|
|
641
|
+
);
|
|
642
|
+
if (!existingHealthRoute) {
|
|
643
|
+
ctx.router.get("/health", async (_req, res) => {
|
|
644
|
+
const health = {
|
|
645
|
+
status: "ok",
|
|
646
|
+
service: ctx.serviceName,
|
|
647
|
+
pid: process.pid,
|
|
648
|
+
};
|
|
649
|
+
if (pluginManager) {
|
|
650
|
+
health.plugins = await pluginManager.healthCheck();
|
|
651
|
+
}
|
|
652
|
+
res.json(health);
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Notify supervisor that this worker finished startup for readiness aggregation.
|
|
658
|
+
if (process.send) {
|
|
659
|
+
process.send({
|
|
660
|
+
type: "forge:group-ready",
|
|
661
|
+
group: workerConfig.groupName,
|
|
662
|
+
workerId,
|
|
663
|
+
pid: process.pid,
|
|
664
|
+
services: entries.map((e) => e.name),
|
|
665
|
+
port: Number.isFinite(port) && port > 0 ? port : null,
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Signal handlers moved to early in bootstrap() — see below uncaughtException handler
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
bootstrap().catch((err) => {
|
|
673
|
+
const services = workerConfig.serviceNames || "unknown";
|
|
674
|
+
console.error(`[ThreadForge] Worker bootstrap failed for group "${workerConfig.groupName}" (services: ${services}):`, err);
|
|
675
|
+
process.exit(1);
|
|
676
|
+
});
|