threadforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +152 -0
- package/bin/forge.js +1050 -0
- package/bin/host-commands.js +344 -0
- package/bin/platform-commands.js +570 -0
- package/package.json +71 -0
- package/shared/auth.js +475 -0
- package/src/core/DirectMessageBus.js +364 -0
- package/src/core/EndpointResolver.js +247 -0
- package/src/core/ForgeContext.js +2227 -0
- package/src/core/ForgeHost.js +122 -0
- package/src/core/ForgePlatform.js +145 -0
- package/src/core/Ingress.js +768 -0
- package/src/core/Interceptors.js +420 -0
- package/src/core/MessageBus.js +310 -0
- package/src/core/Prometheus.js +305 -0
- package/src/core/RequestContext.js +413 -0
- package/src/core/RoutingStrategy.js +316 -0
- package/src/core/Supervisor.js +1306 -0
- package/src/core/ThreadAllocator.js +196 -0
- package/src/core/WorkerChannelManager.js +879 -0
- package/src/core/config.js +624 -0
- package/src/core/host-config.js +311 -0
- package/src/core/network-utils.js +166 -0
- package/src/core/platform-config.js +308 -0
- package/src/decorators/ServiceProxy.js +899 -0
- package/src/decorators/index.js +571 -0
- package/src/deploy/NginxGenerator.js +865 -0
- package/src/deploy/PlatformManifestGenerator.js +96 -0
- package/src/deploy/RouteManifestGenerator.js +112 -0
- package/src/deploy/index.js +984 -0
- package/src/frontend/FrontendDevLifecycle.js +65 -0
- package/src/frontend/FrontendPluginOrchestrator.js +187 -0
- package/src/frontend/SiteResolver.js +63 -0
- package/src/frontend/StaticMountRegistry.js +90 -0
- package/src/frontend/index.js +5 -0
- package/src/frontend/plugins/index.js +2 -0
- package/src/frontend/plugins/viteFrontend.js +79 -0
- package/src/frontend/types.js +35 -0
- package/src/index.js +56 -0
- package/src/internals.js +31 -0
- package/src/plugins/PluginManager.js +537 -0
- package/src/plugins/ScopedPostgres.js +192 -0
- package/src/plugins/ScopedRedis.js +142 -0
- package/src/plugins/index.js +1729 -0
- package/src/registry/ServiceRegistry.js +796 -0
- package/src/scaling/ScaleAdvisor.js +442 -0
- package/src/services/Service.js +195 -0
- package/src/services/worker-bootstrap.js +676 -0
- package/src/templates/auth-service.js +65 -0
- package/src/templates/identity-service.js +75 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* MessageBus
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Use DirectMessageBus instead. MessageBus uses older cluster IPC
|
|
7
|
+
* with round-robin dispatch. DirectMessageBus provides Unix Domain Socket mesh
|
|
8
|
+
* for direct worker-to-worker communication with better performance.
|
|
9
|
+
*
|
|
10
|
+
* Provides inter-service communication within a ThreadForge runtime.
|
|
11
|
+
* Uses IPC channels for cluster workers and MessagePort for worker_threads.
|
|
12
|
+
*
|
|
13
|
+
* Features:
|
|
14
|
+
* - Named channels per service
|
|
15
|
+
* - Round-robin dispatch across workers of the same service
|
|
16
|
+
* - Request/response pattern support
|
|
17
|
+
* - Broadcast to all workers of a service
|
|
18
|
+
*/
|
|
19
|
+
export class MessageBus extends EventEmitter {
|
|
20
|
+
constructor() {
|
|
21
|
+
super();
|
|
22
|
+
console.warn('[ThreadForge] MessageBus is deprecated. Use DirectMessageBus instead.');
|
|
23
|
+
|
|
24
|
+
/** @type {Map<string, Array<{pid: number, send: Function}>>} */
|
|
25
|
+
this.channels = new Map();
|
|
26
|
+
|
|
27
|
+
/** @type {Map<string, number>} round-robin index per service */
|
|
28
|
+
this.rrIndex = new Map();
|
|
29
|
+
|
|
30
|
+
/** @type {Map<string, {resolve: Function, reject: Function, timer: NodeJS.Timeout}>} */
|
|
31
|
+
this.pendingRequests = new Map();
|
|
32
|
+
|
|
33
|
+
this.requestCounter = 0;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Register a worker for a service.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} serviceName
|
|
40
|
+
* @param {Object} worker - cluster.Worker or worker_threads.Worker
|
|
41
|
+
* @param {'cluster' | 'worker'} mode
|
|
42
|
+
*/
|
|
43
|
+
registerWorker(serviceName, worker, mode = "cluster") {
|
|
44
|
+
if (!this.channels.has(serviceName)) {
|
|
45
|
+
this.channels.set(serviceName, []);
|
|
46
|
+
this.rrIndex.set(serviceName, 0);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const channel = this.channels.get(serviceName);
|
|
50
|
+
|
|
51
|
+
if (mode === "cluster") {
|
|
52
|
+
channel.push({
|
|
53
|
+
pid: worker.id,
|
|
54
|
+
send: (msg) => worker.send(msg),
|
|
55
|
+
worker,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Listen for messages from this worker and route them
|
|
59
|
+
worker.on("message", (msg) => this._handleMessage(serviceName, msg, worker));
|
|
60
|
+
} else {
|
|
61
|
+
// worker_threads mode
|
|
62
|
+
channel.push({
|
|
63
|
+
pid: worker.threadId,
|
|
64
|
+
send: (msg) => worker.postMessage(msg),
|
|
65
|
+
worker,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
worker.on("message", (msg) => this._handleMessage(serviceName, msg, worker));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Remove all workers for a service.
|
|
74
|
+
*/
|
|
75
|
+
unregisterService(serviceName) {
|
|
76
|
+
this.channels.delete(serviceName);
|
|
77
|
+
this.rrIndex.delete(serviceName);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Remove a specific worker.
|
|
82
|
+
*/
|
|
83
|
+
unregisterWorker(serviceName, pidOrThreadId) {
|
|
84
|
+
const channel = this.channels.get(serviceName);
|
|
85
|
+
if (!channel) return;
|
|
86
|
+
|
|
87
|
+
const idx = channel.findIndex((c) => c.pid === pidOrThreadId);
|
|
88
|
+
if (idx !== -1) {
|
|
89
|
+
channel.splice(idx, 1);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (channel.length === 0) {
|
|
93
|
+
this.channels.delete(serviceName);
|
|
94
|
+
this.rrIndex.delete(serviceName);
|
|
95
|
+
} else {
|
|
96
|
+
// CR-IPC-12: Use modulo to preserve distribution during scale-down
|
|
97
|
+
const currentIdx = this.rrIndex.get(serviceName) ?? 0;
|
|
98
|
+
if (currentIdx >= channel.length) {
|
|
99
|
+
this.rrIndex.set(serviceName, currentIdx % channel.length);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Send a fire-and-forget message to a service (round-robin across its workers).
|
|
106
|
+
*
|
|
107
|
+
* @param {string} from - Sending service name
|
|
108
|
+
* @param {string} target - Target service name
|
|
109
|
+
* @param {*} payload - Message payload (must be structured-cloneable)
|
|
110
|
+
*/
|
|
111
|
+
send(from, target, payload) {
|
|
112
|
+
const channel = this.channels.get(target);
|
|
113
|
+
if (!channel || channel.length === 0) {
|
|
114
|
+
throw new Error(`[MessageBus] No workers registered for service "${target}"`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const idx = this.rrIndex.get(target);
|
|
118
|
+
const worker = channel[idx % channel.length];
|
|
119
|
+
this.rrIndex.set(target, (idx + 1) % 1_000_000_000);
|
|
120
|
+
|
|
121
|
+
// CR-IPC-6: Let send errors propagate to callers
|
|
122
|
+
worker.send({
|
|
123
|
+
type: "forge:message",
|
|
124
|
+
from,
|
|
125
|
+
payload,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Broadcast a message to ALL workers of a service.
|
|
131
|
+
*/
|
|
132
|
+
broadcast(from, target, payload) {
|
|
133
|
+
const channel = this.channels.get(target);
|
|
134
|
+
if (!channel || channel.length === 0) {
|
|
135
|
+
throw new Error(`[MessageBus] No workers registered for service "${target}"`);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
let successCount = 0;
|
|
139
|
+
for (const worker of channel) {
|
|
140
|
+
try {
|
|
141
|
+
worker.send({
|
|
142
|
+
type: "forge:message",
|
|
143
|
+
from,
|
|
144
|
+
payload,
|
|
145
|
+
});
|
|
146
|
+
successCount++;
|
|
147
|
+
} catch (err) {
|
|
148
|
+
this.logger?.warn?.(`Broadcast failed for worker ${worker.pid}: ${err.message}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return successCount;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Broadcast a message to ALL services (all workers).
|
|
156
|
+
*/
|
|
157
|
+
broadcastAll(from, payload) {
|
|
158
|
+
for (const [serviceName] of this.channels) {
|
|
159
|
+
if (serviceName !== from) {
|
|
160
|
+
this.broadcast(from, serviceName, payload);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Send a request and wait for a response.
|
|
167
|
+
* Implements a request/reply pattern over IPC.
|
|
168
|
+
*
|
|
169
|
+
* @param {string} from
|
|
170
|
+
* @param {string} target
|
|
171
|
+
* @param {*} payload
|
|
172
|
+
* @param {number} [timeoutMs=5000]
|
|
173
|
+
* @returns {Promise<*>}
|
|
174
|
+
*/
|
|
175
|
+
request(from, target, payload, timeoutMs = 5000) {
|
|
176
|
+
const channel = this.channels.get(target);
|
|
177
|
+
if (!channel || channel.length === 0) {
|
|
178
|
+
return Promise.reject(new Error(`[MessageBus] No workers registered for service "${target}"`));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (this.pendingRequests.size >= 10000) {
|
|
182
|
+
return Promise.reject(new Error(
|
|
183
|
+
`Too many pending requests (${this.pendingRequests.size}). Target service "${target}" may be unresponsive.`
|
|
184
|
+
));
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
this.requestCounter = (this.requestCounter + 1) % 1_000_000_000;
|
|
188
|
+
const requestId = `req_sup_${this.requestCounter}_${Date.now()}_${Math.random().toString(36).slice(2, 18)}`;
|
|
189
|
+
|
|
190
|
+
return new Promise((resolve, reject) => {
|
|
191
|
+
const timer = setTimeout(() => {
|
|
192
|
+
this.pendingRequests.delete(requestId);
|
|
193
|
+
reject(new Error(`[MessageBus] Request to "${target}" timed out after ${timeoutMs}ms`));
|
|
194
|
+
}, timeoutMs);
|
|
195
|
+
|
|
196
|
+
this.pendingRequests.set(requestId, { resolve, reject, timer });
|
|
197
|
+
|
|
198
|
+
const idx = this.rrIndex.get(target);
|
|
199
|
+
const worker = channel[idx % channel.length];
|
|
200
|
+
this.rrIndex.set(target, (idx + 1) % 1_000_000_000);
|
|
201
|
+
|
|
202
|
+
try {
|
|
203
|
+
worker.send({
|
|
204
|
+
type: "forge:request",
|
|
205
|
+
requestId,
|
|
206
|
+
from,
|
|
207
|
+
payload,
|
|
208
|
+
});
|
|
209
|
+
} catch (err) {
|
|
210
|
+
clearTimeout(timer);
|
|
211
|
+
this.pendingRequests.delete(requestId);
|
|
212
|
+
reject(err);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Handle an incoming message from a worker.
|
|
219
|
+
*/
|
|
220
|
+
_handleMessage(sourceName, msg, callerWorker) {
|
|
221
|
+
if (!msg || !msg.type) return;
|
|
222
|
+
|
|
223
|
+
switch (msg.type) {
|
|
224
|
+
case "forge:send": {
|
|
225
|
+
// Worker wants to send to another service
|
|
226
|
+
this.send(sourceName, msg.target, msg.payload);
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
case "forge:broadcast": {
|
|
231
|
+
this.broadcast(sourceName, msg.target, msg.payload);
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
case "forge:request": {
|
|
236
|
+
// Worker wants to make a request to another service
|
|
237
|
+
this.request(sourceName, msg.target, msg.payload, msg.timeout)
|
|
238
|
+
.then((result) => {
|
|
239
|
+
// Send response only to the requesting worker (not all workers)
|
|
240
|
+
try {
|
|
241
|
+
callerWorker.send({
|
|
242
|
+
type: "forge:response",
|
|
243
|
+
requestId: msg.requestId,
|
|
244
|
+
payload: result,
|
|
245
|
+
error: null,
|
|
246
|
+
});
|
|
247
|
+
} catch (sendErr) {
|
|
248
|
+
console.error('[MessageBus] Failed to send response:', sendErr.message);
|
|
249
|
+
}
|
|
250
|
+
})
|
|
251
|
+
.catch((err) => {
|
|
252
|
+
try {
|
|
253
|
+
callerWorker.send({
|
|
254
|
+
type: "forge:response",
|
|
255
|
+
requestId: msg.requestId,
|
|
256
|
+
payload: null,
|
|
257
|
+
error: { message: err.message, code: err.code, statusCode: err.statusCode },
|
|
258
|
+
});
|
|
259
|
+
} catch (sendErr) {
|
|
260
|
+
console.error('[MessageBus] Failed to send error response:', sendErr.message);
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
case "forge:response": {
|
|
267
|
+
// Response to a pending request
|
|
268
|
+
const pending = this.pendingRequests.get(msg.requestId);
|
|
269
|
+
if (pending) {
|
|
270
|
+
clearTimeout(pending.timer);
|
|
271
|
+
this.pendingRequests.delete(msg.requestId);
|
|
272
|
+
if (msg.error) {
|
|
273
|
+
const errObj = typeof msg.error === 'object' ? msg.error : { message: msg.error };
|
|
274
|
+
const err = new Error(errObj.message);
|
|
275
|
+
if (errObj.code) err.code = errObj.code;
|
|
276
|
+
if (errObj.statusCode) err.statusCode = errObj.statusCode;
|
|
277
|
+
pending.reject(err);
|
|
278
|
+
} else {
|
|
279
|
+
pending.resolve(msg.payload);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
case "forge:metric": {
|
|
286
|
+
this.emit("metric", { service: sourceName, ...msg });
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
case "forge:log": {
|
|
291
|
+
this.emit("log", { service: sourceName, ...msg });
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Get stats about registered channels.
|
|
299
|
+
*/
|
|
300
|
+
stats() {
|
|
301
|
+
const result = {};
|
|
302
|
+
for (const [name, workers] of this.channels) {
|
|
303
|
+
result[name] = {
|
|
304
|
+
workerCount: workers.length,
|
|
305
|
+
pids: workers.map((w) => w.pid),
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
return result;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prometheus Metrics
|
|
3
|
+
*
|
|
4
|
+
* Proper Prometheus exposition format. Auto-tracks:
|
|
5
|
+
*
|
|
6
|
+
* forge_http_request_duration_seconds — histogram, per route + status + tenant
|
|
7
|
+
* forge_http_requests_total — counter, per route + status + tenant
|
|
8
|
+
* forge_http_active_requests — gauge, current in-flight
|
|
9
|
+
* forge_rpc_duration_seconds — histogram, per target + method
|
|
10
|
+
* forge_rpc_total — counter, per target + method + success/error
|
|
11
|
+
* forge_circuit_breaker_state — gauge, 0=closed 1=open 2=half-open
|
|
12
|
+
* forge_concurrency_limit — gauge, per-service adaptive limit
|
|
13
|
+
* forge_concurrency_in_flight — gauge, per-service in-flight
|
|
14
|
+
* forge_ws_connections_active — gauge, current WebSocket connections
|
|
15
|
+
* forge_ws_messages_total — counter, WebSocket messages
|
|
16
|
+
*
|
|
17
|
+
* GET /metrics → text/plain Prometheus format
|
|
18
|
+
*
|
|
19
|
+
* Usage:
|
|
20
|
+
* const metrics = new PrometheusMetrics('users', 0);
|
|
21
|
+
* metrics.httpRequestDuration(0.045, { method: 'GET', path: '/users/:id', status: 200, tenant_id: 't_acme' });
|
|
22
|
+
* metrics.expose() → "# HELP forge_http_request_duration_seconds ..."
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const ALLOWED_HTTP_LABELS = new Set(["method", "status", "route_pattern", "service", "status_code"]);
|
|
26
|
+
const ALLOWED_RPC_LABELS = new Set(["target", "method", "result", "service"]);
|
|
27
|
+
|
|
28
|
+
/** P2: Normalize error codes to a finite set to prevent cardinality bombs */
|
|
29
|
+
export function normalizeErrorCode(err) {
|
|
30
|
+
if (err.code === 'ETIMEDOUT' || err.code === 'TIMEOUT') return 'timeout';
|
|
31
|
+
if (err.code === 'ECONNREFUSED' || err.code === 'ECONNRESET') return 'connection';
|
|
32
|
+
if (err.statusCode >= 500) return 'server_error';
|
|
33
|
+
if (err.statusCode >= 400) return 'client_error';
|
|
34
|
+
if (err.code === 'CIRCUIT_OPEN') return 'circuit_open';
|
|
35
|
+
return 'unknown';
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** P9: Cache label key string construction */
|
|
39
|
+
const _labelKeyCache = new Map();
|
|
40
|
+
const MAX_LABEL_KEY_CACHE = 10000;
|
|
41
|
+
|
|
42
|
+
function filterLabels(labels, allowed) {
|
|
43
|
+
const filtered = {};
|
|
44
|
+
for (const key of Object.keys(labels)) {
|
|
45
|
+
if (allowed.has(key)) filtered[key] = labels[key];
|
|
46
|
+
}
|
|
47
|
+
return filtered;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export class PrometheusMetrics {
|
|
51
|
+
constructor(serviceName, workerId) {
|
|
52
|
+
this.serviceName = serviceName;
|
|
53
|
+
this.workerId = workerId;
|
|
54
|
+
this.startTime = Date.now();
|
|
55
|
+
|
|
56
|
+
// Histogram buckets (seconds)
|
|
57
|
+
this.httpBuckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10];
|
|
58
|
+
this.rpcBuckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1, 5];
|
|
59
|
+
|
|
60
|
+
/** @type {Map<string, number>} */
|
|
61
|
+
this._counters = new Map();
|
|
62
|
+
/** @type {Map<string, number>} */
|
|
63
|
+
this._gauges = new Map();
|
|
64
|
+
/** @type {Map<string, {count: number, sum: number, buckets: number[], bucketBounds: number[]}>} */
|
|
65
|
+
this._histograms = new Map();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ── Auto-tracked methods ──
|
|
69
|
+
|
|
70
|
+
httpRequestStart() {
|
|
71
|
+
this._gaugeInc("forge_http_active_requests", { service: this.serviceName });
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
httpRequestEnd(durationSecs, labels = {}) {
|
|
75
|
+
const filtered = filterLabels({ service: this.serviceName, ...labels }, ALLOWED_HTTP_LABELS);
|
|
76
|
+
this._gaugeDec("forge_http_active_requests", { service: this.serviceName });
|
|
77
|
+
this._histogramObserve(
|
|
78
|
+
"forge_http_request_duration_seconds",
|
|
79
|
+
durationSecs,
|
|
80
|
+
filtered,
|
|
81
|
+
this.httpBuckets,
|
|
82
|
+
);
|
|
83
|
+
this._counterInc("forge_http_requests_total", filtered);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
rpcCall(durationSecs, labels = {}) {
|
|
87
|
+
const filtered = filterLabels({ service: this.serviceName, ...labels }, ALLOWED_RPC_LABELS);
|
|
88
|
+
this._histogramObserve(
|
|
89
|
+
"forge_rpc_duration_seconds",
|
|
90
|
+
durationSecs,
|
|
91
|
+
{
|
|
92
|
+
source: this.serviceName,
|
|
93
|
+
...filtered,
|
|
94
|
+
},
|
|
95
|
+
this.rpcBuckets,
|
|
96
|
+
);
|
|
97
|
+
this._counterInc("forge_rpc_total", {
|
|
98
|
+
source: this.serviceName,
|
|
99
|
+
...filtered,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
wsConnectionOpen() {
|
|
104
|
+
this._gaugeInc("forge_ws_connections_active", { service: this.serviceName });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
wsConnectionClose() {
|
|
108
|
+
this._gaugeDec("forge_ws_connections_active", { service: this.serviceName });
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
wsMessage(direction = "inbound") {
|
|
112
|
+
this._counterInc("forge_ws_messages_total", {
|
|
113
|
+
service: this.serviceName,
|
|
114
|
+
direction,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
concurrencyUpdate(target, limit, inFlight) {
|
|
119
|
+
this._gaugeSet("forge_concurrency_limit", limit, {
|
|
120
|
+
source: this.serviceName,
|
|
121
|
+
target,
|
|
122
|
+
});
|
|
123
|
+
this._gaugeSet("forge_concurrency_in_flight", inFlight, {
|
|
124
|
+
source: this.serviceName,
|
|
125
|
+
target,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
circuitBreakerState(target, state) {
|
|
130
|
+
// 0=closed, 1=open, 2=half-open
|
|
131
|
+
const val = state === "open" ? 1 : state === "half-open" ? 2 : 0;
|
|
132
|
+
this._gaugeSet("forge_circuit_breaker_state", val, {
|
|
133
|
+
source: this.serviceName,
|
|
134
|
+
target,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ── Generic methods (for custom metrics) ──
|
|
139
|
+
|
|
140
|
+
counter(name, value = 1, labels = {}) {
|
|
141
|
+
this._counterInc(name, { service: this.serviceName, ...labels }, value);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
gauge(name, value, labels = {}) {
|
|
145
|
+
this._gaugeSet(name, value, { service: this.serviceName, ...labels });
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
histogram(name, value, labels = {}, buckets) {
|
|
149
|
+
this._histogramObserve(name, value, { service: this.serviceName, ...labels }, buckets || this.httpBuckets);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ── Exposition ──
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Render all metrics in Prometheus text exposition format.
|
|
156
|
+
*/
|
|
157
|
+
expose() {
|
|
158
|
+
const lines = [];
|
|
159
|
+
const now = Date.now();
|
|
160
|
+
|
|
161
|
+
// Uptime
|
|
162
|
+
lines.push(`# HELP forge_uptime_seconds Service uptime`);
|
|
163
|
+
lines.push(`# TYPE forge_uptime_seconds gauge`);
|
|
164
|
+
lines.push(
|
|
165
|
+
`forge_uptime_seconds{service="${this.serviceName}",worker="${this.workerId}"} ${((now - this.startTime) / 1000).toFixed(1)}`,
|
|
166
|
+
);
|
|
167
|
+
lines.push("");
|
|
168
|
+
|
|
169
|
+
// Counters
|
|
170
|
+
const counterNames = new Set([...this._counters.keys()].map((k) => k.split("{")[0]));
|
|
171
|
+
for (const name of counterNames) {
|
|
172
|
+
lines.push(`# HELP ${name} ${name}`);
|
|
173
|
+
lines.push(`# TYPE ${name} counter`);
|
|
174
|
+
for (const [key, val] of this._counters) {
|
|
175
|
+
if (key === name || key.startsWith(name + "{")) {
|
|
176
|
+
lines.push(`${key} ${val}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
lines.push("");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Gauges
|
|
183
|
+
const gaugeNames = new Set([...this._gauges.keys()].map((k) => k.split("{")[0]));
|
|
184
|
+
for (const name of gaugeNames) {
|
|
185
|
+
lines.push(`# HELP ${name} ${name}`);
|
|
186
|
+
lines.push(`# TYPE ${name} gauge`);
|
|
187
|
+
for (const [key, val] of this._gauges) {
|
|
188
|
+
if (key === name || key.startsWith(name + "{")) {
|
|
189
|
+
lines.push(`${key} ${val}`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
lines.push("");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Histograms
|
|
196
|
+
const histNames = new Set([...this._histograms.keys()].map((k) => k.split("{")[0]));
|
|
197
|
+
for (const name of histNames) {
|
|
198
|
+
lines.push(`# HELP ${name} ${name}`);
|
|
199
|
+
lines.push(`# TYPE ${name} histogram`);
|
|
200
|
+
for (const [key, hist] of this._histograms) {
|
|
201
|
+
if (key !== name && !key.startsWith(name + "{")) continue;
|
|
202
|
+
const labelPart = key.includes("{") ? key.slice(key.indexOf("{")) : "";
|
|
203
|
+
const prefix = name;
|
|
204
|
+
|
|
205
|
+
// Bucket lines
|
|
206
|
+
const buckets = hist.bucketBounds;
|
|
207
|
+
for (let i = 0; i < buckets.length; i++) {
|
|
208
|
+
const le = buckets[i];
|
|
209
|
+
const bucketLabel = labelPart ? labelPart.replace("}", `,le="${le}"}`) : `{le="${le}"}`;
|
|
210
|
+
lines.push(`${prefix}_bucket${bucketLabel} ${hist.buckets[i]}`);
|
|
211
|
+
}
|
|
212
|
+
const infLabel = labelPart ? labelPart.replace("}", `,le="+Inf"}`) : `{le="+Inf"}`;
|
|
213
|
+
lines.push(`${prefix}_bucket${infLabel} ${hist.count}`);
|
|
214
|
+
lines.push(`${prefix}_sum${labelPart} ${hist.sum.toFixed(6)}`);
|
|
215
|
+
lines.push(`${prefix}_count${labelPart} ${hist.count}`);
|
|
216
|
+
}
|
|
217
|
+
lines.push("");
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Process metrics
|
|
221
|
+
const mem = process.memoryUsage();
|
|
222
|
+
lines.push(`# HELP process_resident_memory_bytes Resident memory size in bytes`);
|
|
223
|
+
lines.push(`# TYPE process_resident_memory_bytes gauge`);
|
|
224
|
+
lines.push(`process_resident_memory_bytes{service="${this.serviceName}"} ${mem.rss}`);
|
|
225
|
+
lines.push(`# HELP process_heap_used_bytes Node.js heap used in bytes`);
|
|
226
|
+
lines.push(`# TYPE process_heap_used_bytes gauge`);
|
|
227
|
+
lines.push(`process_heap_used_bytes{service="${this.serviceName}"} ${mem.heapUsed}`);
|
|
228
|
+
lines.push("");
|
|
229
|
+
|
|
230
|
+
return `${lines.join("\n")}\n`;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// ── Internal ──
|
|
234
|
+
|
|
235
|
+
_labelStr(labels) {
|
|
236
|
+
const key = JSON.stringify(labels);
|
|
237
|
+
if (_labelKeyCache.has(key)) return _labelKeyCache.get(key);
|
|
238
|
+
|
|
239
|
+
const validKeyRe = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
240
|
+
const parts = [];
|
|
241
|
+
for (const k of Object.keys(labels)) {
|
|
242
|
+
const v = labels[k];
|
|
243
|
+
if (v === undefined || v === null) continue;
|
|
244
|
+
// H7: Sanitize label keys
|
|
245
|
+
const safeKey = validKeyRe.test(k) ? k : k.replace(/[^a-zA-Z0-9_]/g, '_').replace(/^[^a-zA-Z_]/, '_');
|
|
246
|
+
// Truncate label values to 256 chars and escape special characters
|
|
247
|
+
const safeVal = String(v).slice(0, 256).replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n').replace(/\r/g, '\\r').replace(/\t/g, '\\t');
|
|
248
|
+
parts.push(`${safeKey}="${safeVal}"`);
|
|
249
|
+
}
|
|
250
|
+
const result = parts.length > 0 ? `{${parts.join(",")}}` : "";
|
|
251
|
+
|
|
252
|
+
// P9-fix: Evict oldest 20% when cache is full (was: stop caching entirely)
|
|
253
|
+
if (_labelKeyCache.size >= MAX_LABEL_KEY_CACHE) {
|
|
254
|
+
const evictCount = Math.ceil(MAX_LABEL_KEY_CACHE * 0.2);
|
|
255
|
+
const iter = _labelKeyCache.keys();
|
|
256
|
+
for (let i = 0; i < evictCount; i++) {
|
|
257
|
+
const k = iter.next().value;
|
|
258
|
+
if (k !== undefined) _labelKeyCache.delete(k);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
_labelKeyCache.set(key, result);
|
|
262
|
+
return result;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
_counterInc(name, labels = {}, value = 1) {
|
|
266
|
+
const key = `${name}${this._labelStr(labels)}`;
|
|
267
|
+
this._counters.set(key, (this._counters.get(key) ?? 0) + value);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
_gaugeSet(name, value, labels = {}) {
|
|
271
|
+
const key = `${name}${this._labelStr(labels)}`;
|
|
272
|
+
this._gauges.set(key, value);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
_gaugeInc(name, labels = {}) {
|
|
276
|
+
const key = `${name}${this._labelStr(labels)}`;
|
|
277
|
+
this._gauges.set(key, (this._gauges.get(key) ?? 0) + 1);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
_gaugeDec(name, labels = {}) {
|
|
281
|
+
const key = `${name}${this._labelStr(labels)}`;
|
|
282
|
+
this._gauges.set(key, Math.max(0, (this._gauges.get(key) ?? 0) - 1));
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
_histogramObserve(name, value, labels = {}, buckets) {
|
|
286
|
+
const key = `${name}${this._labelStr(labels)}`;
|
|
287
|
+
let hist = this._histograms.get(key);
|
|
288
|
+
if (!hist) {
|
|
289
|
+
hist = { count: 0, sum: 0, buckets: new Array(buckets.length).fill(0), bucketBounds: buckets };
|
|
290
|
+
this._histograms.set(key, hist);
|
|
291
|
+
}
|
|
292
|
+
hist.count++;
|
|
293
|
+
hist.sum += value;
|
|
294
|
+
// P18: Early break — once value <= buckets[i], all subsequent buckets also match
|
|
295
|
+
// since buckets are sorted ascending. Increment from this point to end.
|
|
296
|
+
for (let i = 0; i < buckets.length; i++) {
|
|
297
|
+
if (value <= buckets[i]) {
|
|
298
|
+
for (let j = i; j < buckets.length; j++) {
|
|
299
|
+
hist.buckets[j]++;
|
|
300
|
+
}
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|