@hotmeshio/hotmesh 0.19.4 → 0.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/modules/enums.d.ts +38 -0
- package/build/modules/enums.js +40 -1
- package/build/package.json +1 -1
- package/build/services/engine/index.d.ts +6 -0
- package/build/services/engine/index.js +8 -0
- package/build/services/hotmesh/index.js +14 -0
- package/build/services/quorum/index.js +13 -0
- package/build/services/router/config/index.d.ts +2 -2
- package/build/services/router/config/index.js +8 -1
- package/build/services/router/consumption/index.d.ts +6 -1
- package/build/services/router/consumption/index.js +35 -1
- package/build/services/router/duress/index.d.ts +91 -0
- package/build/services/router/duress/index.js +217 -0
- package/build/services/router/index.d.ts +7 -0
- package/build/services/router/index.js +18 -1
- package/build/services/router/throttling/index.d.ts +28 -0
- package/build/services/router/throttling/index.js +43 -4
- package/build/types/quorum.d.ts +20 -1
- package/package.json +1 -1
package/build/modules/enums.d.ts
CHANGED
|
@@ -256,6 +256,44 @@ export declare const HMSH_GUID_SIZE: number;
|
|
|
256
256
|
* Default task queue name used when no task queue is specified
|
|
257
257
|
*/
|
|
258
258
|
export declare const DEFAULT_TASK_QUEUE = "default";
|
|
259
|
+
/**
|
|
260
|
+
* EMA smoothing factor for duress latency tracking.
|
|
261
|
+
* Higher = faster response to spikes, lower = more stable.
|
|
262
|
+
* @default 0.3
|
|
263
|
+
*/
|
|
264
|
+
export declare const HMSH_DURESS_ALPHA: number;
|
|
265
|
+
/**
|
|
266
|
+
* Number of messages between duress evaluations.
|
|
267
|
+
* @default 10
|
|
268
|
+
*/
|
|
269
|
+
export declare const HMSH_DURESS_EVAL_INTERVAL: number;
|
|
270
|
+
/**
|
|
271
|
+
* Max EMA (ms) below which the engine is considered healthy. No throttle applied.
|
|
272
|
+
* @default 200
|
|
273
|
+
*/
|
|
274
|
+
export declare const HMSH_DURESS_HEALTHY_CEILING_MS: number;
|
|
275
|
+
/**
|
|
276
|
+
* Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
|
|
277
|
+
* @default 1000
|
|
278
|
+
*/
|
|
279
|
+
export declare const HMSH_DURESS_MILD_CEILING_MS: number;
|
|
280
|
+
/**
|
|
281
|
+
* Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
|
|
282
|
+
* Above this threshold, duress is severe (2000-5000ms throttle).
|
|
283
|
+
* @default 5000
|
|
284
|
+
*/
|
|
285
|
+
export declare const HMSH_DURESS_MODERATE_CEILING_MS: number;
|
|
286
|
+
/**
|
|
287
|
+
* Minimum interval (ms) between quorum duress broadcasts.
|
|
288
|
+
* @default 5000
|
|
289
|
+
*/
|
|
290
|
+
export declare const HMSH_DURESS_BROADCAST_INTERVAL_MS: number;
|
|
291
|
+
/**
|
|
292
|
+
* Number of consecutive improving evaluations required before
|
|
293
|
+
* dropping a duress level. Prevents oscillation.
|
|
294
|
+
* @default 3
|
|
295
|
+
*/
|
|
296
|
+
export declare const HMSH_DURESS_HYSTERESIS_COUNT: number;
|
|
259
297
|
/**
|
|
260
298
|
* PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
|
|
261
299
|
* a reference message is sent instead and the subscriber fetches via getState.
|
package/build/modules/enums.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_ENGINE_CONCURRENCY = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_CYCLES = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
|
|
4
|
-
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
|
|
4
|
+
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
|
|
5
5
|
/**
|
|
6
6
|
* Determines the log level for the application. The default is 'info'.
|
|
7
7
|
*/
|
|
@@ -288,6 +288,45 @@ exports.HMSH_GUID_SIZE = Math.min(parseInt(process.env.HMSH_GUID_SIZE, 10) || 22
|
|
|
288
288
|
* Default task queue name used when no task queue is specified
|
|
289
289
|
*/
|
|
290
290
|
exports.DEFAULT_TASK_QUEUE = 'default';
|
|
291
|
+
// DURESS DETECTION — adaptive engine throttling based on processing latency
|
|
292
|
+
/**
|
|
293
|
+
* EMA smoothing factor for duress latency tracking.
|
|
294
|
+
* Higher = faster response to spikes, lower = more stable.
|
|
295
|
+
* @default 0.3
|
|
296
|
+
*/
|
|
297
|
+
exports.HMSH_DURESS_ALPHA = parseFloat(process.env.HMSH_DURESS_ALPHA) || 0.3;
|
|
298
|
+
/**
|
|
299
|
+
* Number of messages between duress evaluations.
|
|
300
|
+
* @default 10
|
|
301
|
+
*/
|
|
302
|
+
exports.HMSH_DURESS_EVAL_INTERVAL = parseInt(process.env.HMSH_DURESS_EVAL_INTERVAL, 10) || 10;
|
|
303
|
+
/**
|
|
304
|
+
* Max EMA (ms) below which the engine is considered healthy. No throttle applied.
|
|
305
|
+
* @default 200
|
|
306
|
+
*/
|
|
307
|
+
exports.HMSH_DURESS_HEALTHY_CEILING_MS = parseInt(process.env.HMSH_DURESS_HEALTHY_CEILING_MS, 10) || 200;
|
|
308
|
+
/**
|
|
309
|
+
* Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
|
|
310
|
+
* @default 1000
|
|
311
|
+
*/
|
|
312
|
+
exports.HMSH_DURESS_MILD_CEILING_MS = parseInt(process.env.HMSH_DURESS_MILD_CEILING_MS, 10) || 1000;
|
|
313
|
+
/**
|
|
314
|
+
* Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
|
|
315
|
+
* Above this threshold, duress is severe (2000-5000ms throttle).
|
|
316
|
+
* @default 5000
|
|
317
|
+
*/
|
|
318
|
+
exports.HMSH_DURESS_MODERATE_CEILING_MS = parseInt(process.env.HMSH_DURESS_MODERATE_CEILING_MS, 10) || 5000;
|
|
319
|
+
/**
|
|
320
|
+
* Minimum interval (ms) between quorum duress broadcasts.
|
|
321
|
+
* @default 5000
|
|
322
|
+
*/
|
|
323
|
+
exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = parseInt(process.env.HMSH_DURESS_BROADCAST_INTERVAL_MS, 10) || 5000;
|
|
324
|
+
/**
|
|
325
|
+
* Number of consecutive improving evaluations required before
|
|
326
|
+
* dropping a duress level. Prevents oscillation.
|
|
327
|
+
* @default 3
|
|
328
|
+
*/
|
|
329
|
+
exports.HMSH_DURESS_HYSTERESIS_COUNT = parseInt(process.env.HMSH_DURESS_HYSTERESIS_COUNT, 10) || 3;
|
|
291
330
|
/**
|
|
292
331
|
* PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
|
|
293
332
|
* a reference message is sent instead and the subscriber fetches via getState.
|
package/build/package.json
CHANGED
|
@@ -167,6 +167,12 @@ declare class EngineService {
|
|
|
167
167
|
* @private
|
|
168
168
|
*/
|
|
169
169
|
throttle(delayInMillis: number): Promise<void>;
|
|
170
|
+
/**
|
|
171
|
+
* Apply a remote duress signal from the quorum.
|
|
172
|
+
* Delegates to the router's duress manager.
|
|
173
|
+
* @private
|
|
174
|
+
*/
|
|
175
|
+
applyRemoteDuress(throttleMs: number, level: string): void;
|
|
170
176
|
/**
|
|
171
177
|
* @private
|
|
172
178
|
*/
|
|
@@ -267,6 +267,14 @@ class EngineService {
|
|
|
267
267
|
async throttle(delayInMillis) {
|
|
268
268
|
return Signal.throttle(this, delayInMillis);
|
|
269
269
|
}
|
|
270
|
+
/**
|
|
271
|
+
* Apply a remote duress signal from the quorum.
|
|
272
|
+
* Delegates to the router's duress manager.
|
|
273
|
+
* @private
|
|
274
|
+
*/
|
|
275
|
+
applyRemoteDuress(throttleMs, level) {
|
|
276
|
+
this.router?.applyRemoteDuress(throttleMs, level);
|
|
277
|
+
}
|
|
270
278
|
// ═════════════════════════════════════════════════════════════════
|
|
271
279
|
// 9. PUB/SUB — topic messaging, subscriptions, callbacks
|
|
272
280
|
// → see pubsub.ts
|
|
@@ -181,6 +181,20 @@ class HotMesh {
|
|
|
181
181
|
instance.logger = new logger_1.LoggerService(config.appId, instance.guid, config.name || '', config.logLevel);
|
|
182
182
|
await Init.initEngine(instance, config, instance.logger);
|
|
183
183
|
await Init.initQuorum(instance, config, instance.engine, instance.logger);
|
|
184
|
+
// Register duress broadcast callback: engine router → quorum
|
|
185
|
+
if (instance.engine?.router && instance.quorum) {
|
|
186
|
+
const quorum = instance.quorum;
|
|
187
|
+
const engineGuid = instance.guid;
|
|
188
|
+
instance.engine.router.setDuressCallback((snapshot) => {
|
|
189
|
+
quorum.pub({
|
|
190
|
+
type: 'duress',
|
|
191
|
+
originator: engineGuid,
|
|
192
|
+
duress_score_ms: snapshot.score_ms,
|
|
193
|
+
throttle_ms: snapshot.throttle_ms,
|
|
194
|
+
level: snapshot.level,
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
}
|
|
184
198
|
await Init.doWork(instance, config, instance.logger);
|
|
185
199
|
return instance;
|
|
186
200
|
}
|
|
@@ -111,6 +111,12 @@ class QuorumService {
|
|
|
111
111
|
else if (message.type === 'cron') {
|
|
112
112
|
self.engine.processTimeHooks();
|
|
113
113
|
}
|
|
114
|
+
else if (message.type === 'duress') {
|
|
115
|
+
// Apply remote duress signal (skip our own broadcasts)
|
|
116
|
+
if (message.originator !== self.guid) {
|
|
117
|
+
self.engine.applyRemoteDuress(message.throttle_ms, message.level);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
114
120
|
else if (message.type === 'rollcall') {
|
|
115
121
|
self.doRollCall(message);
|
|
116
122
|
}
|
|
@@ -147,6 +153,13 @@ class QuorumService {
|
|
|
147
153
|
reclaimCount: this.engine.router.reclaimCount,
|
|
148
154
|
system: await (0, utils_1.getSystemHealth)(),
|
|
149
155
|
};
|
|
156
|
+
// Include duress info if available (engine routers only)
|
|
157
|
+
const duressSnapshot = this.engine.router.getDuressSnapshot?.();
|
|
158
|
+
if (duressSnapshot) {
|
|
159
|
+
profile.duress_level = duressSnapshot.level;
|
|
160
|
+
profile.duress_score_ms = duressSnapshot.score_ms;
|
|
161
|
+
profile.duress_per_type = duressSnapshot.per_type;
|
|
162
|
+
}
|
|
150
163
|
}
|
|
151
164
|
this.subscribe.publish(hotmesh_1.KeyType.QUORUM, {
|
|
152
165
|
type: 'pong',
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
1
|
+
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT } from '../../../modules/enums';
|
|
2
2
|
import { RouterConfig } from '../../../types/stream';
|
|
3
3
|
export declare class RouterConfigManager {
|
|
4
4
|
static validateThrottle(delayInMillis: number): void;
|
|
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
|
|
|
8
8
|
readonly: boolean;
|
|
9
9
|
};
|
|
10
10
|
}
|
|
11
|
-
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
11
|
+
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT, };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
3
|
+
exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
4
4
|
const enums_1 = require("../../../modules/enums");
|
|
5
5
|
Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
|
|
6
6
|
Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
|
|
@@ -21,6 +21,13 @@ Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: fu
|
|
|
21
21
|
Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
|
|
22
22
|
Object.defineProperty(exports, "MAX_STREAM_RETRIES", { enumerable: true, get: function () { return enums_1.MAX_STREAM_RETRIES; } });
|
|
23
23
|
Object.defineProperty(exports, "HMSH_POISON_MESSAGE_THRESHOLD", { enumerable: true, get: function () { return enums_1.HMSH_POISON_MESSAGE_THRESHOLD; } });
|
|
24
|
+
Object.defineProperty(exports, "HMSH_DURESS_ALPHA", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_ALPHA; } });
|
|
25
|
+
Object.defineProperty(exports, "HMSH_DURESS_EVAL_INTERVAL", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_EVAL_INTERVAL; } });
|
|
26
|
+
Object.defineProperty(exports, "HMSH_DURESS_HEALTHY_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HEALTHY_CEILING_MS; } });
|
|
27
|
+
Object.defineProperty(exports, "HMSH_DURESS_MILD_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MILD_CEILING_MS; } });
|
|
28
|
+
Object.defineProperty(exports, "HMSH_DURESS_MODERATE_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MODERATE_CEILING_MS; } });
|
|
29
|
+
Object.defineProperty(exports, "HMSH_DURESS_BROADCAST_INTERVAL_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_BROADCAST_INTERVAL_MS; } });
|
|
30
|
+
Object.defineProperty(exports, "HMSH_DURESS_HYSTERESIS_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HYSTERESIS_COUNT; } });
|
|
24
31
|
class RouterConfigManager {
|
|
25
32
|
static validateThrottle(delayInMillis) {
|
|
26
33
|
if (!Number.isInteger(delayInMillis) ||
|
|
@@ -3,6 +3,7 @@ import { StreamService } from '../../stream';
|
|
|
3
3
|
import { ThrottleManager } from '../throttling';
|
|
4
4
|
import { ErrorHandler } from '../error-handling';
|
|
5
5
|
import { LifecycleManager } from '../lifecycle';
|
|
6
|
+
import { DuressManager, DuressSnapshot } from '../duress';
|
|
6
7
|
import { StreamData, StreamDataResponse } from '../../../types/stream';
|
|
7
8
|
import { ProviderClient, ProviderTransaction } from '../../../types/provider';
|
|
8
9
|
export declare class ConsumptionManager<S extends StreamService<ProviderClient, ProviderTransaction>> {
|
|
@@ -26,6 +27,9 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
26
27
|
private set hasReachedMaxBackoff(value);
|
|
27
28
|
private router;
|
|
28
29
|
private retry;
|
|
30
|
+
private duressManager?;
|
|
31
|
+
private onDuressChange?;
|
|
32
|
+
private messagesSinceLastEval;
|
|
29
33
|
private adaptiveReservationTimeout;
|
|
30
34
|
private adaptiveBatchSize;
|
|
31
35
|
private lastDepthCheckAt;
|
|
@@ -33,7 +37,8 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
33
37
|
private static readonly DEPTH_SCALE_UP_THRESHOLD;
|
|
34
38
|
private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
|
|
35
39
|
private static readonly LEASE_BUFFER_S;
|
|
36
|
-
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
|
|
40
|
+
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy, duressManager?: DuressManager);
|
|
41
|
+
setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
|
|
37
42
|
/**
|
|
38
43
|
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
39
44
|
* from the consume loop. When depth is high:
|
|
@@ -17,7 +17,8 @@ class ConsumptionManager {
|
|
|
17
17
|
get counts() { return this.router.counts; }
|
|
18
18
|
get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
|
|
19
19
|
set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
|
|
20
|
-
constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
|
|
20
|
+
constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry, duressManager) {
|
|
21
|
+
this.messagesSinceLastEval = 0;
|
|
21
22
|
// Adaptive consumption pressure — scales reservation timeout AND batch
|
|
22
23
|
// size based on stream depth. Under load: timeout grows (prevents
|
|
23
24
|
// duplicate re-reservation) and batch size shrinks (reduces in-memory
|
|
@@ -37,6 +38,10 @@ class ConsumptionManager {
|
|
|
37
38
|
this.role = role;
|
|
38
39
|
this.router = router;
|
|
39
40
|
this.retry = retry;
|
|
41
|
+
this.duressManager = duressManager;
|
|
42
|
+
}
|
|
43
|
+
setDuressCallback(callback) {
|
|
44
|
+
this.onDuressChange = callback;
|
|
40
45
|
}
|
|
41
46
|
/**
|
|
42
47
|
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
@@ -500,6 +505,7 @@ class ConsumptionManager {
|
|
|
500
505
|
const deadlineMs = this.adaptiveReservationTimeout * 1000;
|
|
501
506
|
let output;
|
|
502
507
|
const telemetry = new telemetry_1.RouterTelemetry(this.appId);
|
|
508
|
+
const processingStart = Date.now();
|
|
503
509
|
try {
|
|
504
510
|
telemetry.startStreamSpan(input, this.role);
|
|
505
511
|
let deadlineTimer;
|
|
@@ -549,6 +555,34 @@ class ConsumptionManager {
|
|
|
549
555
|
telemetry.setStreamErrorFromException(err);
|
|
550
556
|
output = this.errorHandler.structureUnhandledError(input, err instanceof Error ? err : new Error(String(err)));
|
|
551
557
|
}
|
|
558
|
+
// Record processing latency for duress detection (engine routers only).
|
|
559
|
+
// This measures the actual time spent in execStreamLeg — the causal
|
|
560
|
+
// signal. The prior depth-based mechanism (adjustConsumptionPressure)
|
|
561
|
+
// responds to queue backlog; this responds to *why* the backlog exists.
|
|
562
|
+
// Evaluation is amortized over HMSH_DURESS_EVAL_INTERVAL messages to
|
|
563
|
+
// avoid per-message overhead.
|
|
564
|
+
if (this.duressManager && input.type) {
|
|
565
|
+
const processingDuration = Date.now() - processingStart;
|
|
566
|
+
this.duressManager.recordLatency(input.type, processingDuration);
|
|
567
|
+
if (++this.messagesSinceLastEval >= config_1.HMSH_DURESS_EVAL_INTERVAL) {
|
|
568
|
+
this.messagesSinceLastEval = 0;
|
|
569
|
+
const snapshot = this.duressManager.evaluate();
|
|
570
|
+
this.throttleManager.setDuressFloor(snapshot.throttle_ms);
|
|
571
|
+
if (snapshot.level !== 'healthy') {
|
|
572
|
+
this.logger.info('stream-duress-detected', {
|
|
573
|
+
stream,
|
|
574
|
+
level: snapshot.level,
|
|
575
|
+
score_ms: snapshot.score_ms,
|
|
576
|
+
throttle_ms: snapshot.throttle_ms,
|
|
577
|
+
per_type: snapshot.per_type,
|
|
578
|
+
});
|
|
579
|
+
}
|
|
580
|
+
if (this.duressManager.shouldBroadcast() && this.onDuressChange) {
|
|
581
|
+
this.duressManager.markBroadcast();
|
|
582
|
+
this.onDuressChange(snapshot);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
}
|
|
552
586
|
try {
|
|
553
587
|
// When the ENGINE encounters an infrastructure error (schema not found,
|
|
554
588
|
// subscription missing — code 598), the message is permanently unprocessable.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { StreamDataType } from '../../../types/stream';
|
|
2
|
+
import { DuressLevel } from '../../../types/quorum';
|
|
3
|
+
export interface DuressSnapshot {
|
|
4
|
+
level: DuressLevel;
|
|
5
|
+
score_ms: number;
|
|
6
|
+
throttle_ms: number;
|
|
7
|
+
per_type: Record<string, number>;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Adaptive engine duress detection via processing latency.
|
|
11
|
+
*
|
|
12
|
+
* ## Why this exists
|
|
13
|
+
*
|
|
14
|
+
* Prior fixes responded to queue *depth* (a symptom) — doubling reservation
|
|
15
|
+
* timeouts and halving batch sizes when the stream backed up. A deep queue
|
|
16
|
+
* doesn't necessarily mean duress (it could be a burst of external triggers),
|
|
17
|
+
* and a shallow queue doesn't necessarily mean health. This module responds
|
|
18
|
+
* to the *cause*: actual processing latency per message type.
|
|
19
|
+
*
|
|
20
|
+
* ## How it works
|
|
21
|
+
*
|
|
22
|
+
* Each engine router tracks an exponential moving average (EMA) of how long
|
|
23
|
+
* each canonical message type (transition, timehook, webhook, worker response,
|
|
24
|
+
* etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
|
|
25
|
+
* crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
|
|
26
|
+
* the manager computes a proportional throttle delay that the ThrottleManager
|
|
27
|
+
* applies as a floor on engine consumption rate.
|
|
28
|
+
*
|
|
29
|
+
* ## Hysteresis (asymmetric by design)
|
|
30
|
+
*
|
|
31
|
+
* Escalation is immediate — if the engine suddenly enters duress, the throttle
|
|
32
|
+
* kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
|
|
33
|
+
* (default 3) consecutive improving evaluations before dropping a level. This
|
|
34
|
+
* prevents oscillation: throttle → drain → un-throttle → refill → throttle.
|
|
35
|
+
* The EMA already smooths individual outliers; hysteresis gates the recovery
|
|
36
|
+
* path specifically.
|
|
37
|
+
*
|
|
38
|
+
* ## Quorum coordination
|
|
39
|
+
*
|
|
40
|
+
* When a router detects a level change (or remains in duress), it broadcasts
|
|
41
|
+
* a `'duress'` message via the quorum. Peers adopt the signal only if it's
|
|
42
|
+
* worse than their local state, so the mesh converges on the worst-case
|
|
43
|
+
* throttle without coordination.
|
|
44
|
+
*
|
|
45
|
+
* ## What this does NOT do
|
|
46
|
+
*
|
|
47
|
+
* External messages (triggers, signalIn/webhooks from the outside world) are
|
|
48
|
+
* never throttled. They always enter `engine_streams`. Only the engine
|
|
49
|
+
* routers' pull rate slows down, giving the system breathing room.
|
|
50
|
+
*/
|
|
51
|
+
export declare class DuressManager {
|
|
52
|
+
private emas;
|
|
53
|
+
private sampleCounts;
|
|
54
|
+
private currentLevel;
|
|
55
|
+
private belowThresholdCount;
|
|
56
|
+
private duressThrottle;
|
|
57
|
+
private lastBroadcastAt;
|
|
58
|
+
private lastBroadcastLevel;
|
|
59
|
+
/**
|
|
60
|
+
* Record a processing duration for a message type.
|
|
61
|
+
* Updates the exponential moving average for that type.
|
|
62
|
+
*/
|
|
63
|
+
recordLatency(type: StreamDataType, durationMs: number): void;
|
|
64
|
+
/**
|
|
65
|
+
* Evaluate duress state from current EMAs.
|
|
66
|
+
* Returns a snapshot with level, score, recommended throttle,
|
|
67
|
+
* and per-type latencies.
|
|
68
|
+
*/
|
|
69
|
+
evaluate(): DuressSnapshot;
|
|
70
|
+
getDuressThrottle(): number;
|
|
71
|
+
getCurrentLevel(): DuressLevel;
|
|
72
|
+
/**
|
|
73
|
+
* Apply a duress snapshot received from another engine via quorum.
|
|
74
|
+
* Adopts the remote signal only if it indicates worse duress than local.
|
|
75
|
+
*/
|
|
76
|
+
applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
|
|
77
|
+
/**
|
|
78
|
+
* Whether a quorum broadcast is warranted.
|
|
79
|
+
* Rate-limited and only fires when level changes or duress is active.
|
|
80
|
+
*/
|
|
81
|
+
shouldBroadcast(): boolean;
|
|
82
|
+
markBroadcast(): void;
|
|
83
|
+
/**
|
|
84
|
+
* Returns a snapshot for inclusion in quorum rollcall profiles.
|
|
85
|
+
*/
|
|
86
|
+
getSnapshot(): DuressSnapshot;
|
|
87
|
+
private scoreToLevel;
|
|
88
|
+
private scoreToThrottle;
|
|
89
|
+
private lerp;
|
|
90
|
+
private levelOrdinal;
|
|
91
|
+
}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DuressManager = void 0;
|
|
4
|
+
const config_1 = require("../config");
|
|
5
|
+
// Throttle band boundaries (ms)
|
|
6
|
+
const MILD_THROTTLE_MIN = 100;
|
|
7
|
+
const MILD_THROTTLE_MAX = 500;
|
|
8
|
+
const MODERATE_THROTTLE_MIN = 500;
|
|
9
|
+
const MODERATE_THROTTLE_MAX = 2000;
|
|
10
|
+
const SEVERE_THROTTLE_MIN = 2000;
|
|
11
|
+
const SEVERE_THROTTLE_MAX = 5000;
|
|
12
|
+
/**
|
|
13
|
+
* Adaptive engine duress detection via processing latency.
|
|
14
|
+
*
|
|
15
|
+
* ## Why this exists
|
|
16
|
+
*
|
|
17
|
+
* Prior fixes responded to queue *depth* (a symptom) — doubling reservation
|
|
18
|
+
* timeouts and halving batch sizes when the stream backed up. A deep queue
|
|
19
|
+
* doesn't necessarily mean duress (it could be a burst of external triggers),
|
|
20
|
+
* and a shallow queue doesn't necessarily mean health. This module responds
|
|
21
|
+
* to the *cause*: actual processing latency per message type.
|
|
22
|
+
*
|
|
23
|
+
* ## How it works
|
|
24
|
+
*
|
|
25
|
+
* Each engine router tracks an exponential moving average (EMA) of how long
|
|
26
|
+
* each canonical message type (transition, timehook, webhook, worker response,
|
|
27
|
+
* etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
|
|
28
|
+
* crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
|
|
29
|
+
* the manager computes a proportional throttle delay that the ThrottleManager
|
|
30
|
+
* applies as a floor on engine consumption rate.
|
|
31
|
+
*
|
|
32
|
+
* ## Hysteresis (asymmetric by design)
|
|
33
|
+
*
|
|
34
|
+
* Escalation is immediate — if the engine suddenly enters duress, the throttle
|
|
35
|
+
* kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
|
|
36
|
+
* (default 3) consecutive improving evaluations before dropping a level. This
|
|
37
|
+
* prevents oscillation: throttle → drain → un-throttle → refill → throttle.
|
|
38
|
+
* The EMA already smooths individual outliers; hysteresis gates the recovery
|
|
39
|
+
* path specifically.
|
|
40
|
+
*
|
|
41
|
+
* ## Quorum coordination
|
|
42
|
+
*
|
|
43
|
+
* When a router detects a level change (or remains in duress), it broadcasts
|
|
44
|
+
* a `'duress'` message via the quorum. Peers adopt the signal only if it's
|
|
45
|
+
* worse than their local state, so the mesh converges on the worst-case
|
|
46
|
+
* throttle without coordination.
|
|
47
|
+
*
|
|
48
|
+
* ## What this does NOT do
|
|
49
|
+
*
|
|
50
|
+
* External messages (triggers, signalIn/webhooks from the outside world) are
|
|
51
|
+
* never throttled. They always enter `engine_streams`. Only the engine
|
|
52
|
+
* routers' pull rate slows down, giving the system breathing room.
|
|
53
|
+
*/
|
|
54
|
+
class DuressManager {
|
|
55
|
+
constructor() {
|
|
56
|
+
// Per-message-type exponential moving averages
|
|
57
|
+
this.emas = new Map();
|
|
58
|
+
this.sampleCounts = new Map();
|
|
59
|
+
// Hysteresis state
|
|
60
|
+
this.currentLevel = 'healthy';
|
|
61
|
+
this.belowThresholdCount = 0;
|
|
62
|
+
// Computed duress throttle floor
|
|
63
|
+
this.duressThrottle = 0;
|
|
64
|
+
// Broadcast rate limiting
|
|
65
|
+
this.lastBroadcastAt = 0;
|
|
66
|
+
this.lastBroadcastLevel = 'healthy';
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Record a processing duration for a message type.
|
|
70
|
+
* Updates the exponential moving average for that type.
|
|
71
|
+
*/
|
|
72
|
+
recordLatency(type, durationMs) {
|
|
73
|
+
const key = type;
|
|
74
|
+
const count = this.sampleCounts.get(key) || 0;
|
|
75
|
+
if (count === 0) {
|
|
76
|
+
// First sample: seed the EMA directly
|
|
77
|
+
this.emas.set(key, durationMs);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
const prev = this.emas.get(key);
|
|
81
|
+
this.emas.set(key, config_1.HMSH_DURESS_ALPHA * durationMs + (1 - config_1.HMSH_DURESS_ALPHA) * prev);
|
|
82
|
+
}
|
|
83
|
+
this.sampleCounts.set(key, count + 1);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Evaluate duress state from current EMAs.
|
|
87
|
+
* Returns a snapshot with level, score, recommended throttle,
|
|
88
|
+
* and per-type latencies.
|
|
89
|
+
*/
|
|
90
|
+
evaluate() {
|
|
91
|
+
// Aggregate: max EMA across all tracked types
|
|
92
|
+
let maxEma = 0;
|
|
93
|
+
const perType = {};
|
|
94
|
+
for (const [type, ema] of this.emas) {
|
|
95
|
+
perType[type] = Math.round(ema);
|
|
96
|
+
if (ema > maxEma)
|
|
97
|
+
maxEma = ema;
|
|
98
|
+
}
|
|
99
|
+
const rawLevel = this.scoreToLevel(maxEma);
|
|
100
|
+
// Hysteresis: only drop level after sustained improvement
|
|
101
|
+
if (this.levelOrdinal(rawLevel) < this.levelOrdinal(this.currentLevel)) {
|
|
102
|
+
this.belowThresholdCount++;
|
|
103
|
+
if (this.belowThresholdCount >= config_1.HMSH_DURESS_HYSTERESIS_COUNT) {
|
|
104
|
+
this.currentLevel = rawLevel;
|
|
105
|
+
this.belowThresholdCount = 0;
|
|
106
|
+
}
|
|
107
|
+
// Keep current (higher) level until hysteresis clears
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
// Same or worse: reset hysteresis counter, adopt immediately
|
|
111
|
+
this.belowThresholdCount = 0;
|
|
112
|
+
this.currentLevel = rawLevel;
|
|
113
|
+
}
|
|
114
|
+
this.duressThrottle =
|
|
115
|
+
this.currentLevel === 'healthy'
|
|
116
|
+
? 0
|
|
117
|
+
: this.scoreToThrottle(maxEma, this.currentLevel);
|
|
118
|
+
return {
|
|
119
|
+
level: this.currentLevel,
|
|
120
|
+
score_ms: Math.round(maxEma),
|
|
121
|
+
throttle_ms: this.duressThrottle,
|
|
122
|
+
per_type: perType,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
getDuressThrottle() {
|
|
126
|
+
return this.duressThrottle;
|
|
127
|
+
}
|
|
128
|
+
getCurrentLevel() {
|
|
129
|
+
return this.currentLevel;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Apply a duress snapshot received from another engine via quorum.
|
|
133
|
+
* Adopts the remote signal only if it indicates worse duress than local.
|
|
134
|
+
*/
|
|
135
|
+
applyRemoteDuress(throttleMs, level) {
|
|
136
|
+
if (this.levelOrdinal(level) > this.levelOrdinal(this.currentLevel)) {
|
|
137
|
+
this.currentLevel = level;
|
|
138
|
+
this.duressThrottle = throttleMs;
|
|
139
|
+
this.belowThresholdCount = 0;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Whether a quorum broadcast is warranted.
|
|
144
|
+
* Rate-limited and only fires when level changes or duress is active.
|
|
145
|
+
*/
|
|
146
|
+
shouldBroadcast() {
|
|
147
|
+
const now = Date.now();
|
|
148
|
+
if (now - this.lastBroadcastAt < config_1.HMSH_DURESS_BROADCAST_INTERVAL_MS) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
return (this.currentLevel !== this.lastBroadcastLevel ||
|
|
152
|
+
this.currentLevel !== 'healthy');
|
|
153
|
+
}
|
|
154
|
+
markBroadcast() {
|
|
155
|
+
this.lastBroadcastAt = Date.now();
|
|
156
|
+
this.lastBroadcastLevel = this.currentLevel;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Returns a snapshot for inclusion in quorum rollcall profiles.
|
|
160
|
+
*/
|
|
161
|
+
getSnapshot() {
|
|
162
|
+
let maxEma = 0;
|
|
163
|
+
const perType = {};
|
|
164
|
+
for (const [type, ema] of this.emas) {
|
|
165
|
+
perType[type] = Math.round(ema);
|
|
166
|
+
if (ema > maxEma)
|
|
167
|
+
maxEma = ema;
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
level: this.currentLevel,
|
|
171
|
+
score_ms: Math.round(maxEma),
|
|
172
|
+
throttle_ms: this.duressThrottle,
|
|
173
|
+
per_type: perType,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
// --- Private helpers ---
|
|
177
|
+
scoreToLevel(ms) {
|
|
178
|
+
if (ms < config_1.HMSH_DURESS_HEALTHY_CEILING_MS)
|
|
179
|
+
return 'healthy';
|
|
180
|
+
if (ms < config_1.HMSH_DURESS_MILD_CEILING_MS)
|
|
181
|
+
return 'mild';
|
|
182
|
+
if (ms < config_1.HMSH_DURESS_MODERATE_CEILING_MS)
|
|
183
|
+
return 'moderate';
|
|
184
|
+
return 'severe';
|
|
185
|
+
}
|
|
186
|
+
scoreToThrottle(ms, level) {
|
|
187
|
+
// Linear interpolation within the band for the given level
|
|
188
|
+
switch (level) {
|
|
189
|
+
case 'healthy':
|
|
190
|
+
return 0;
|
|
191
|
+
case 'mild':
|
|
192
|
+
return this.lerp(ms, config_1.HMSH_DURESS_HEALTHY_CEILING_MS, config_1.HMSH_DURESS_MILD_CEILING_MS, MILD_THROTTLE_MIN, MILD_THROTTLE_MAX);
|
|
193
|
+
case 'moderate':
|
|
194
|
+
return this.lerp(ms, config_1.HMSH_DURESS_MILD_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS, MODERATE_THROTTLE_MIN, MODERATE_THROTTLE_MAX);
|
|
195
|
+
case 'severe':
|
|
196
|
+
// Clamp to severe band max; beyond the ceiling is still severe
|
|
197
|
+
return this.lerp(ms, config_1.HMSH_DURESS_MODERATE_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS * 2, SEVERE_THROTTLE_MIN, SEVERE_THROTTLE_MAX);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
lerp(value, inMin, inMax, outMin, outMax) {
|
|
201
|
+
const t = Math.min(Math.max((value - inMin) / (inMax - inMin), 0), 1);
|
|
202
|
+
return Math.round(outMin + t * (outMax - outMin));
|
|
203
|
+
}
|
|
204
|
+
levelOrdinal(level) {
|
|
205
|
+
switch (level) {
|
|
206
|
+
case 'healthy':
|
|
207
|
+
return 0;
|
|
208
|
+
case 'mild':
|
|
209
|
+
return 1;
|
|
210
|
+
case 'moderate':
|
|
211
|
+
return 2;
|
|
212
|
+
case 'severe':
|
|
213
|
+
return 3;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
exports.DuressManager = DuressManager;
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
import { ILogger } from '../logger';
|
|
3
3
|
import { StreamService } from '../stream';
|
|
4
4
|
import { RouterConfig, StreamData, StreamDataResponse, StreamRole } from '../../types/stream';
|
|
5
|
+
import { DuressLevel } from '../../types/quorum';
|
|
5
6
|
import { ProviderClient, ProviderTransaction } from '../../types/provider';
|
|
7
|
+
import { DuressSnapshot } from './duress';
|
|
6
8
|
declare class Router<S extends StreamService<ProviderClient, ProviderTransaction>> {
|
|
7
9
|
appId: string;
|
|
8
10
|
guid: string;
|
|
@@ -29,6 +31,8 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
|
|
|
29
31
|
private errorHandler;
|
|
30
32
|
private lifecycleManager;
|
|
31
33
|
private consumptionManager;
|
|
34
|
+
private duressManager?;
|
|
35
|
+
private _pendingDuressSnapshot?;
|
|
32
36
|
constructor(config: RouterConfig, stream: S, logger: ILogger);
|
|
33
37
|
get throttle(): number;
|
|
34
38
|
get shouldConsume(): boolean;
|
|
@@ -49,6 +53,9 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
|
|
|
49
53
|
structureUnhandledError(input: StreamData, err: Error): StreamDataResponse;
|
|
50
54
|
structureUnacknowledgedError(input: StreamData): StreamDataResponse;
|
|
51
55
|
structureError(input: StreamData, output: StreamDataResponse): StreamDataResponse;
|
|
56
|
+
setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
|
|
57
|
+
applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
|
|
58
|
+
getDuressSnapshot(): DuressSnapshot | undefined;
|
|
52
59
|
static stopConsuming(): Promise<void>;
|
|
53
60
|
stopConsuming(): Promise<void>;
|
|
54
61
|
cancelThrottle(): void;
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.Router = void 0;
|
|
4
|
+
const stream_1 = require("../../types/stream");
|
|
4
5
|
// Import the new submodules
|
|
5
6
|
const config_1 = require("./config");
|
|
6
7
|
const throttling_1 = require("./throttling");
|
|
7
8
|
const error_handling_1 = require("./error-handling");
|
|
8
9
|
const lifecycle_1 = require("./lifecycle");
|
|
9
10
|
const consumption_1 = require("./consumption");
|
|
11
|
+
const duress_1 = require("./duress");
|
|
10
12
|
class Router {
|
|
11
13
|
constructor(config, stream, logger) {
|
|
12
14
|
// Legacy properties for backward compatibility
|
|
@@ -34,7 +36,11 @@ class Router {
|
|
|
34
36
|
this.throttleManager = new throttling_1.ThrottleManager(enhancedConfig.throttle);
|
|
35
37
|
this.errorHandler = new error_handling_1.ErrorHandler();
|
|
36
38
|
this.lifecycleManager = new lifecycle_1.LifecycleManager(this.readonly, this.topic, this.logger, this.stream);
|
|
37
|
-
|
|
39
|
+
// Engine routers get duress detection; workers do not
|
|
40
|
+
if (this.role === stream_1.StreamRole.ENGINE) {
|
|
41
|
+
this.duressManager = new duress_1.DuressManager();
|
|
42
|
+
}
|
|
43
|
+
this.consumptionManager = new consumption_1.ConsumptionManager(this.stream, this.logger, this.throttleManager, this.errorHandler, this.lifecycleManager, this.reclaimDelay, this.reclaimCount, this.appId, this.role, this, this.retry, this.duressManager);
|
|
38
44
|
this.resetThrottleState();
|
|
39
45
|
}
|
|
40
46
|
// Legacy compatibility methods
|
|
@@ -99,6 +105,17 @@ class Router {
|
|
|
99
105
|
structureError(input, output) {
|
|
100
106
|
return this.errorHandler.structureError(input, output);
|
|
101
107
|
}
|
|
108
|
+
// Duress detection methods (engine routers only)
|
|
109
|
+
setDuressCallback(callback) {
|
|
110
|
+
this.consumptionManager.setDuressCallback(callback);
|
|
111
|
+
}
|
|
112
|
+
applyRemoteDuress(throttleMs, level) {
|
|
113
|
+
this.duressManager?.applyRemoteDuress(throttleMs, level);
|
|
114
|
+
this.throttleManager.setDuressFloor(throttleMs);
|
|
115
|
+
}
|
|
116
|
+
getDuressSnapshot() {
|
|
117
|
+
return this.duressManager?.getSnapshot();
|
|
118
|
+
}
|
|
102
119
|
// Static methods for instance management
|
|
103
120
|
static async stopConsuming() {
|
|
104
121
|
return lifecycle_1.InstanceRegistry.stopAll();
|
|
@@ -1,11 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Elastic throttle with two independent inputs:
|
|
3
|
+
*
|
|
4
|
+
* 1. **User throttle** — set explicitly via quorum `throttle` command.
|
|
5
|
+
* Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
|
|
6
|
+
*
|
|
7
|
+
* 2. **Duress floor** — set automatically by the DuressManager based on
|
|
8
|
+
* processing latency. The effective throttle is `max(user, duress)`,
|
|
9
|
+
* so duress never reduces below what the user set, and pause always
|
|
10
|
+
* takes precedence. When duress clears (floor returns to 0), the
|
|
11
|
+
* user's original throttle remains in effect.
|
|
12
|
+
*
|
|
13
|
+
* `customSleep()` uses the effective throttle, supports dynamic
|
|
14
|
+
* interruption (if the throttle decreases mid-sleep, the router wakes
|
|
15
|
+
* early), and handles pause via a bare promise with no timer.
|
|
16
|
+
*/
|
|
1
17
|
export declare class ThrottleManager {
|
|
2
18
|
private throttle;
|
|
19
|
+
private duressFloor;
|
|
3
20
|
private isSleeping;
|
|
4
21
|
private sleepPromiseResolve;
|
|
5
22
|
private innerPromiseResolve;
|
|
6
23
|
private sleepTimeout;
|
|
7
24
|
constructor(initialThrottle?: number);
|
|
8
25
|
getThrottle(): number;
|
|
26
|
+
/**
|
|
27
|
+
* Set the duress-computed throttle floor. The effective throttle
|
|
28
|
+
* is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
|
|
29
|
+
*/
|
|
30
|
+
setDuressFloor(delayMs: number): void;
|
|
31
|
+
getDuressFloor(): number;
|
|
32
|
+
/**
|
|
33
|
+
* Returns the effective throttle: max of user-set throttle and
|
|
34
|
+
* duress floor. Pause (negative) always takes precedence.
|
|
35
|
+
*/
|
|
36
|
+
getEffectiveThrottle(): number;
|
|
9
37
|
setThrottle(delayInMillis: number): void;
|
|
10
38
|
isPaused(): boolean;
|
|
11
39
|
/**
|
|
@@ -1,9 +1,26 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ThrottleManager = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Elastic throttle with two independent inputs:
|
|
6
|
+
*
|
|
7
|
+
* 1. **User throttle** — set explicitly via quorum `throttle` command.
|
|
8
|
+
* Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
|
|
9
|
+
*
|
|
10
|
+
* 2. **Duress floor** — set automatically by the DuressManager based on
|
|
11
|
+
* processing latency. The effective throttle is `max(user, duress)`,
|
|
12
|
+
* so duress never reduces below what the user set, and pause always
|
|
13
|
+
* takes precedence. When duress clears (floor returns to 0), the
|
|
14
|
+
* user's original throttle remains in effect.
|
|
15
|
+
*
|
|
16
|
+
* `customSleep()` uses the effective throttle, supports dynamic
|
|
17
|
+
* interruption (if the throttle decreases mid-sleep, the router wakes
|
|
18
|
+
* early), and handles pause via a bare promise with no timer.
|
|
19
|
+
*/
|
|
4
20
|
class ThrottleManager {
|
|
5
21
|
constructor(initialThrottle = 0) {
|
|
6
22
|
this.throttle = 0;
|
|
23
|
+
this.duressFloor = 0;
|
|
7
24
|
this.isSleeping = false;
|
|
8
25
|
this.sleepPromiseResolve = null;
|
|
9
26
|
this.innerPromiseResolve = null;
|
|
@@ -13,6 +30,25 @@ class ThrottleManager {
|
|
|
13
30
|
getThrottle() {
|
|
14
31
|
return this.throttle;
|
|
15
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Set the duress-computed throttle floor. The effective throttle
|
|
35
|
+
* is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
|
|
36
|
+
*/
|
|
37
|
+
setDuressFloor(delayMs) {
|
|
38
|
+
this.duressFloor = Math.max(0, delayMs);
|
|
39
|
+
}
|
|
40
|
+
getDuressFloor() {
|
|
41
|
+
return this.duressFloor;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Returns the effective throttle: max of user-set throttle and
|
|
45
|
+
* duress floor. Pause (negative) always takes precedence.
|
|
46
|
+
*/
|
|
47
|
+
getEffectiveThrottle() {
|
|
48
|
+
if (this.throttle < 0)
|
|
49
|
+
return this.throttle; // pause overrides
|
|
50
|
+
return Math.max(this.throttle, this.duressFloor);
|
|
51
|
+
}
|
|
16
52
|
setThrottle(delayInMillis) {
|
|
17
53
|
const wasPaused = this.throttle < 0;
|
|
18
54
|
const wasDecreased = delayInMillis < this.throttle;
|
|
@@ -45,12 +81,13 @@ class ThrottleManager {
|
|
|
45
81
|
* setThrottle() is called with a non-negative value.
|
|
46
82
|
*/
|
|
47
83
|
async customSleep() {
|
|
48
|
-
|
|
84
|
+
const effective = this.getEffectiveThrottle();
|
|
85
|
+
if (effective === 0)
|
|
49
86
|
return;
|
|
50
87
|
if (this.isSleeping)
|
|
51
88
|
return;
|
|
52
89
|
this.isSleeping = true;
|
|
53
|
-
if (
|
|
90
|
+
if (effective < 0) {
|
|
54
91
|
// Paused: wait indefinitely until setThrottle interrupts
|
|
55
92
|
await new Promise((resolve) => {
|
|
56
93
|
this.innerPromiseResolve = resolve;
|
|
@@ -62,12 +99,14 @@ class ThrottleManager {
|
|
|
62
99
|
await new Promise(async (outerResolve) => {
|
|
63
100
|
this.sleepPromiseResolve = outerResolve;
|
|
64
101
|
let elapsedTime = Date.now() - startTime;
|
|
65
|
-
|
|
102
|
+
let target = this.getEffectiveThrottle();
|
|
103
|
+
while (elapsedTime < target && target > 0) {
|
|
66
104
|
await new Promise((innerResolve) => {
|
|
67
105
|
this.innerPromiseResolve = innerResolve;
|
|
68
|
-
this.sleepTimeout = setTimeout(innerResolve,
|
|
106
|
+
this.sleepTimeout = setTimeout(innerResolve, target - elapsedTime);
|
|
69
107
|
});
|
|
70
108
|
elapsedTime = Date.now() - startTime;
|
|
109
|
+
target = this.getEffectiveThrottle();
|
|
71
110
|
}
|
|
72
111
|
this.resetThrottleState();
|
|
73
112
|
outerResolve();
|
package/build/types/quorum.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { JobOutput } from './job';
|
|
2
2
|
import { StringAnyType } from './serializer';
|
|
3
|
+
/** Duress severity level for adaptive engine throttling. */
|
|
4
|
+
export type DuressLevel = 'healthy' | 'mild' | 'moderate' | 'severe';
|
|
3
5
|
export interface CPULoad {
|
|
4
6
|
[cpu: string]: string;
|
|
5
7
|
}
|
|
@@ -86,6 +88,12 @@ export interface QuorumProfile {
|
|
|
86
88
|
system?: SystemHealth;
|
|
87
89
|
/** Stringified worker callback function (only if `signature: true` in rollcall). */
|
|
88
90
|
signature?: string;
|
|
91
|
+
/** Current duress level. Engine routers only. */
|
|
92
|
+
duress_level?: DuressLevel;
|
|
93
|
+
/** Current duress score in ms (max EMA across message types). Engine routers only. */
|
|
94
|
+
duress_score_ms?: number;
|
|
95
|
+
/** Per-message-type EMA latencies in ms. Engine routers only. */
|
|
96
|
+
duress_per_type?: Record<string, number>;
|
|
89
97
|
}
|
|
90
98
|
interface QuorumMessageBase {
|
|
91
99
|
entity?: string;
|
|
@@ -138,6 +146,17 @@ export interface ThrottleMessage extends QuorumMessageBase {
|
|
|
138
146
|
topic?: string;
|
|
139
147
|
throttle: number;
|
|
140
148
|
}
|
|
149
|
+
export interface DuressMessage extends QuorumMessageBase {
|
|
150
|
+
type: 'duress';
|
|
151
|
+
/** GUID of the engine that detected duress */
|
|
152
|
+
originator: string;
|
|
153
|
+
/** Aggregate duress score (max EMA across message types) in ms */
|
|
154
|
+
duress_score_ms: number;
|
|
155
|
+
/** Recommended throttle delay in ms */
|
|
156
|
+
throttle_ms: number;
|
|
157
|
+
/** Duress severity level */
|
|
158
|
+
level: DuressLevel;
|
|
159
|
+
}
|
|
141
160
|
export interface RollCallMessage extends QuorumMessageBase {
|
|
142
161
|
type: 'rollcall';
|
|
143
162
|
guid?: string;
|
|
@@ -169,5 +188,5 @@ export type SubscriptionOptions = {
|
|
|
169
188
|
* These messages serve to coordinate the cache invalidation and switch-over
|
|
170
189
|
* to the new version without any downtime and a coordinating parent server.
|
|
171
190
|
*/
|
|
172
|
-
export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | RollCallMessage | CronMessage | UserMessage;
|
|
191
|
+
export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | DuressMessage | RollCallMessage | CronMessage | UserMessage;
|
|
173
192
|
export {};
|