@hotmeshio/hotmesh 0.19.3 → 0.19.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -256,6 +256,44 @@ export declare const HMSH_GUID_SIZE: number;
256
256
  * Default task queue name used when no task queue is specified
257
257
  */
258
258
  export declare const DEFAULT_TASK_QUEUE = "default";
259
+ /**
260
+ * EMA smoothing factor for duress latency tracking.
261
+ * Higher = faster response to spikes, lower = more stable.
262
+ * @default 0.3
263
+ */
264
+ export declare const HMSH_DURESS_ALPHA: number;
265
+ /**
266
+ * Number of messages between duress evaluations.
267
+ * @default 10
268
+ */
269
+ export declare const HMSH_DURESS_EVAL_INTERVAL: number;
270
+ /**
271
+ * Max EMA (ms) below which the engine is considered healthy. No throttle applied.
272
+ * @default 200
273
+ */
274
+ export declare const HMSH_DURESS_HEALTHY_CEILING_MS: number;
275
+ /**
276
+ * Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
277
+ * @default 1000
278
+ */
279
+ export declare const HMSH_DURESS_MILD_CEILING_MS: number;
280
+ /**
281
+ * Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
282
+ * Above this threshold, duress is severe (2000-5000ms throttle).
283
+ * @default 5000
284
+ */
285
+ export declare const HMSH_DURESS_MODERATE_CEILING_MS: number;
286
+ /**
287
+ * Minimum interval (ms) between quorum duress broadcasts.
288
+ * @default 5000
289
+ */
290
+ export declare const HMSH_DURESS_BROADCAST_INTERVAL_MS: number;
291
+ /**
292
+ * Number of consecutive improving evaluations required before
293
+ * dropping a duress level. Prevents oscillation.
294
+ * @default 3
295
+ */
296
+ export declare const HMSH_DURESS_HYSTERESIS_COUNT: number;
259
297
  /**
260
298
  * PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
261
299
  * a reference message is sent instead and the subscriber fetches via getState.
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_ENGINE_CONCURRENCY = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_CYCLES = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
4
- exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
4
+ exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
5
5
  /**
6
6
  * Determines the log level for the application. The default is 'info'.
7
7
  */
@@ -288,6 +288,45 @@ exports.HMSH_GUID_SIZE = Math.min(parseInt(process.env.HMSH_GUID_SIZE, 10) || 22
288
288
  * Default task queue name used when no task queue is specified
289
289
  */
290
290
  exports.DEFAULT_TASK_QUEUE = 'default';
291
+ // DURESS DETECTION — adaptive engine throttling based on processing latency
292
+ /**
293
+ * EMA smoothing factor for duress latency tracking.
294
+ * Higher = faster response to spikes, lower = more stable.
295
+ * @default 0.3
296
+ */
297
+ exports.HMSH_DURESS_ALPHA = parseFloat(process.env.HMSH_DURESS_ALPHA) || 0.3;
298
+ /**
299
+ * Number of messages between duress evaluations.
300
+ * @default 10
301
+ */
302
+ exports.HMSH_DURESS_EVAL_INTERVAL = parseInt(process.env.HMSH_DURESS_EVAL_INTERVAL, 10) || 10;
303
+ /**
304
+ * Max EMA (ms) below which the engine is considered healthy. No throttle applied.
305
+ * @default 200
306
+ */
307
+ exports.HMSH_DURESS_HEALTHY_CEILING_MS = parseInt(process.env.HMSH_DURESS_HEALTHY_CEILING_MS, 10) || 200;
308
+ /**
309
+ * Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
310
+ * @default 1000
311
+ */
312
+ exports.HMSH_DURESS_MILD_CEILING_MS = parseInt(process.env.HMSH_DURESS_MILD_CEILING_MS, 10) || 1000;
313
+ /**
314
+ * Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
315
+ * Above this threshold, duress is severe (2000-5000ms throttle).
316
+ * @default 5000
317
+ */
318
+ exports.HMSH_DURESS_MODERATE_CEILING_MS = parseInt(process.env.HMSH_DURESS_MODERATE_CEILING_MS, 10) || 5000;
319
+ /**
320
+ * Minimum interval (ms) between quorum duress broadcasts.
321
+ * @default 5000
322
+ */
323
+ exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = parseInt(process.env.HMSH_DURESS_BROADCAST_INTERVAL_MS, 10) || 5000;
324
+ /**
325
+ * Number of consecutive improving evaluations required before
326
+ * dropping a duress level. Prevents oscillation.
327
+ * @default 3
328
+ */
329
+ exports.HMSH_DURESS_HYSTERESIS_COUNT = parseInt(process.env.HMSH_DURESS_HYSTERESIS_COUNT, 10) || 3;
291
330
  /**
292
331
  * PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
293
332
  * a reference message is sent instead and the subscriber fetches via getState.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.19.3",
3
+ "version": "0.19.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",
@@ -119,6 +119,13 @@ declare class DBA {
119
119
  * @private
120
120
  */
121
121
  constructor();
122
+ /**
123
+ * Derives a deterministic advisory lock ID from an appId.
124
+ * Uses a different offset than the stream/store lock IDs to
125
+ * avoid collisions with those deployment locks.
126
+ * @private
127
+ */
128
+ static getAdvisoryLockId(appId: string): number;
122
129
  /**
123
130
  * Sanitizes an appId for use as a Postgres schema name.
124
131
  * Mirrors the naming logic used during table deployment.
@@ -122,6 +122,20 @@ class DBA {
122
122
  * @private
123
123
  */
124
124
  constructor() { }
125
+ /**
126
+ * Derives a deterministic advisory lock ID from an appId.
127
+ * Uses a different offset than the stream/store lock IDs to
128
+ * avoid collisions with those deployment locks.
129
+ * @private
130
+ */
131
+ static getAdvisoryLockId(appId) {
132
+ let hash = 0x44424130; // 'DBA0' — distinct namespace
133
+ for (let i = 0; i < appId.length; i++) {
134
+ hash = (hash << 5) - hash + appId.charCodeAt(i);
135
+ hash |= 0;
136
+ }
137
+ return Math.abs(hash);
138
+ }
125
139
  /**
126
140
  * Sanitizes an appId for use as a Postgres schema name.
127
141
  * Mirrors the naming logic used during table deployment.
@@ -345,8 +359,23 @@ class DBA {
345
359
  const schema = DBA.safeName(appId);
346
360
  const { client, release } = await DBA.getClient(connection);
347
361
  try {
348
- await client.query(DBA.getMigrationSQL(schema));
349
- await client.query(DBA.getPruneFunctionSQL(schema));
362
+ // Guard DDL with an advisory lock. CREATE INDEX IF NOT EXISTS
363
+ // is not atomic under concurrent transactions — two sessions
364
+ // can both see the index as absent, causing a unique_violation
365
+ // on pg_class_relname_nsp_index.
366
+ const lockId = DBA.getAdvisoryLockId(appId);
367
+ const lockResult = await client.query('SELECT pg_try_advisory_lock($1) AS locked', [lockId]);
368
+ if (lockResult.rows[0].locked) {
369
+ try {
370
+ await client.query(DBA.getMigrationSQL(schema));
371
+ await client.query(DBA.getPruneFunctionSQL(schema));
372
+ }
373
+ finally {
374
+ await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
375
+ }
376
+ }
377
+ // If another session holds the lock it is already running
378
+ // the same idempotent DDL — safe to skip.
350
379
  }
351
380
  finally {
352
381
  await release();
@@ -167,6 +167,12 @@ declare class EngineService {
167
167
  * @private
168
168
  */
169
169
  throttle(delayInMillis: number): Promise<void>;
170
+ /**
171
+ * Apply a remote duress signal from the quorum.
172
+ * Delegates to the router's duress manager.
173
+ * @private
174
+ */
175
+ applyRemoteDuress(throttleMs: number, level: string): void;
170
176
  /**
171
177
  * @private
172
178
  */
@@ -267,6 +267,14 @@ class EngineService {
267
267
  async throttle(delayInMillis) {
268
268
  return Signal.throttle(this, delayInMillis);
269
269
  }
270
+ /**
271
+ * Apply a remote duress signal from the quorum.
272
+ * Delegates to the router's duress manager.
273
+ * @private
274
+ */
275
+ applyRemoteDuress(throttleMs, level) {
276
+ this.router?.applyRemoteDuress(throttleMs, level);
277
+ }
270
278
  // ═════════════════════════════════════════════════════════════════
271
279
  // 9. PUB/SUB — topic messaging, subscriptions, callbacks
272
280
  // → see pubsub.ts
@@ -181,6 +181,20 @@ class HotMesh {
181
181
  instance.logger = new logger_1.LoggerService(config.appId, instance.guid, config.name || '', config.logLevel);
182
182
  await Init.initEngine(instance, config, instance.logger);
183
183
  await Init.initQuorum(instance, config, instance.engine, instance.logger);
184
+ // Register duress broadcast callback: engine router → quorum
185
+ if (instance.engine?.router && instance.quorum) {
186
+ const quorum = instance.quorum;
187
+ const engineGuid = instance.guid;
188
+ instance.engine.router.setDuressCallback((snapshot) => {
189
+ quorum.pub({
190
+ type: 'duress',
191
+ originator: engineGuid,
192
+ duress_score_ms: snapshot.score_ms,
193
+ throttle_ms: snapshot.throttle_ms,
194
+ level: snapshot.level,
195
+ });
196
+ });
197
+ }
184
198
  await Init.doWork(instance, config, instance.logger);
185
199
  return instance;
186
200
  }
@@ -111,6 +111,12 @@ class QuorumService {
111
111
  else if (message.type === 'cron') {
112
112
  self.engine.processTimeHooks();
113
113
  }
114
+ else if (message.type === 'duress') {
115
+ // Apply remote duress signal (skip our own broadcasts)
116
+ if (message.originator !== self.guid) {
117
+ self.engine.applyRemoteDuress(message.throttle_ms, message.level);
118
+ }
119
+ }
114
120
  else if (message.type === 'rollcall') {
115
121
  self.doRollCall(message);
116
122
  }
@@ -147,6 +153,13 @@ class QuorumService {
147
153
  reclaimCount: this.engine.router.reclaimCount,
148
154
  system: await (0, utils_1.getSystemHealth)(),
149
155
  };
156
+ // Include duress info if available (engine routers only)
157
+ const duressSnapshot = this.engine.router.getDuressSnapshot?.();
158
+ if (duressSnapshot) {
159
+ profile.duress_level = duressSnapshot.level;
160
+ profile.duress_score_ms = duressSnapshot.score_ms;
161
+ profile.duress_per_type = duressSnapshot.per_type;
162
+ }
150
163
  }
151
164
  this.subscribe.publish(hotmesh_1.KeyType.QUORUM, {
152
165
  type: 'pong',
@@ -1,4 +1,4 @@
1
- import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
1
+ import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT } from '../../../modules/enums';
2
2
  import { RouterConfig } from '../../../types/stream';
3
3
  export declare class RouterConfigManager {
4
4
  static validateThrottle(delayInMillis: number): void;
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
8
8
  readonly: boolean;
9
9
  };
10
10
  }
11
- export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
11
+ export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT, };
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
3
+ exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
4
4
  const enums_1 = require("../../../modules/enums");
5
5
  Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
6
6
  Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
@@ -21,6 +21,13 @@ Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: fu
21
21
  Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
22
22
  Object.defineProperty(exports, "MAX_STREAM_RETRIES", { enumerable: true, get: function () { return enums_1.MAX_STREAM_RETRIES; } });
23
23
  Object.defineProperty(exports, "HMSH_POISON_MESSAGE_THRESHOLD", { enumerable: true, get: function () { return enums_1.HMSH_POISON_MESSAGE_THRESHOLD; } });
24
+ Object.defineProperty(exports, "HMSH_DURESS_ALPHA", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_ALPHA; } });
25
+ Object.defineProperty(exports, "HMSH_DURESS_EVAL_INTERVAL", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_EVAL_INTERVAL; } });
26
+ Object.defineProperty(exports, "HMSH_DURESS_HEALTHY_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HEALTHY_CEILING_MS; } });
27
+ Object.defineProperty(exports, "HMSH_DURESS_MILD_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MILD_CEILING_MS; } });
28
+ Object.defineProperty(exports, "HMSH_DURESS_MODERATE_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MODERATE_CEILING_MS; } });
29
+ Object.defineProperty(exports, "HMSH_DURESS_BROADCAST_INTERVAL_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_BROADCAST_INTERVAL_MS; } });
30
+ Object.defineProperty(exports, "HMSH_DURESS_HYSTERESIS_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HYSTERESIS_COUNT; } });
24
31
  class RouterConfigManager {
25
32
  static validateThrottle(delayInMillis) {
26
33
  if (!Number.isInteger(delayInMillis) ||
@@ -3,6 +3,7 @@ import { StreamService } from '../../stream';
3
3
  import { ThrottleManager } from '../throttling';
4
4
  import { ErrorHandler } from '../error-handling';
5
5
  import { LifecycleManager } from '../lifecycle';
6
+ import { DuressManager, DuressSnapshot } from '../duress';
6
7
  import { StreamData, StreamDataResponse } from '../../../types/stream';
7
8
  import { ProviderClient, ProviderTransaction } from '../../../types/provider';
8
9
  export declare class ConsumptionManager<S extends StreamService<ProviderClient, ProviderTransaction>> {
@@ -26,6 +27,9 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
26
27
  private set hasReachedMaxBackoff(value);
27
28
  private router;
28
29
  private retry;
30
+ private duressManager?;
31
+ private onDuressChange?;
32
+ private messagesSinceLastEval;
29
33
  private adaptiveReservationTimeout;
30
34
  private adaptiveBatchSize;
31
35
  private lastDepthCheckAt;
@@ -33,7 +37,8 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
33
37
  private static readonly DEPTH_SCALE_UP_THRESHOLD;
34
38
  private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
35
39
  private static readonly LEASE_BUFFER_S;
36
- constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
40
+ constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy, duressManager?: DuressManager);
41
+ setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
37
42
  /**
38
43
  * Adjusts reservation timeout based on stream depth. Called periodically
39
44
  * from the consume loop. When depth is high:
@@ -17,7 +17,8 @@ class ConsumptionManager {
17
17
  get counts() { return this.router.counts; }
18
18
  get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
19
19
  set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
20
- constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
20
+ constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry, duressManager) {
21
+ this.messagesSinceLastEval = 0;
21
22
  // Adaptive consumption pressure — scales reservation timeout AND batch
22
23
  // size based on stream depth. Under load: timeout grows (prevents
23
24
  // duplicate re-reservation) and batch size shrinks (reduces in-memory
@@ -37,6 +38,10 @@ class ConsumptionManager {
37
38
  this.role = role;
38
39
  this.router = router;
39
40
  this.retry = retry;
41
+ this.duressManager = duressManager;
42
+ }
43
+ setDuressCallback(callback) {
44
+ this.onDuressChange = callback;
40
45
  }
41
46
  /**
42
47
  * Adjusts reservation timeout based on stream depth. Called periodically
@@ -500,6 +505,7 @@ class ConsumptionManager {
500
505
  const deadlineMs = this.adaptiveReservationTimeout * 1000;
501
506
  let output;
502
507
  const telemetry = new telemetry_1.RouterTelemetry(this.appId);
508
+ const processingStart = Date.now();
503
509
  try {
504
510
  telemetry.startStreamSpan(input, this.role);
505
511
  let deadlineTimer;
@@ -549,6 +555,34 @@ class ConsumptionManager {
549
555
  telemetry.setStreamErrorFromException(err);
550
556
  output = this.errorHandler.structureUnhandledError(input, err instanceof Error ? err : new Error(String(err)));
551
557
  }
558
+ // Record processing latency for duress detection (engine routers only).
559
+ // This measures the actual time spent in execStreamLeg — the causal
560
+ // signal. The prior depth-based mechanism (adjustConsumptionPressure)
561
+ // responds to queue backlog; this responds to *why* the backlog exists.
562
+ // Evaluation is amortized over HMSH_DURESS_EVAL_INTERVAL messages to
563
+ // avoid per-message overhead.
564
+ if (this.duressManager && input.type) {
565
+ const processingDuration = Date.now() - processingStart;
566
+ this.duressManager.recordLatency(input.type, processingDuration);
567
+ if (++this.messagesSinceLastEval >= config_1.HMSH_DURESS_EVAL_INTERVAL) {
568
+ this.messagesSinceLastEval = 0;
569
+ const snapshot = this.duressManager.evaluate();
570
+ this.throttleManager.setDuressFloor(snapshot.throttle_ms);
571
+ if (snapshot.level !== 'healthy') {
572
+ this.logger.info('stream-duress-detected', {
573
+ stream,
574
+ level: snapshot.level,
575
+ score_ms: snapshot.score_ms,
576
+ throttle_ms: snapshot.throttle_ms,
577
+ per_type: snapshot.per_type,
578
+ });
579
+ }
580
+ if (this.duressManager.shouldBroadcast() && this.onDuressChange) {
581
+ this.duressManager.markBroadcast();
582
+ this.onDuressChange(snapshot);
583
+ }
584
+ }
585
+ }
552
586
  try {
553
587
  // When the ENGINE encounters an infrastructure error (schema not found,
554
588
  // subscription missing — code 598), the message is permanently unprocessable.
@@ -0,0 +1,91 @@
1
+ import { StreamDataType } from '../../../types/stream';
2
+ import { DuressLevel } from '../../../types/quorum';
3
+ export interface DuressSnapshot {
4
+ level: DuressLevel;
5
+ score_ms: number;
6
+ throttle_ms: number;
7
+ per_type: Record<string, number>;
8
+ }
9
+ /**
10
+ * Adaptive engine duress detection via processing latency.
11
+ *
12
+ * ## Why this exists
13
+ *
14
+ * Prior fixes responded to queue *depth* (a symptom) — doubling reservation
15
+ * timeouts and halving batch sizes when the stream backed up. A deep queue
16
+ * doesn't necessarily mean duress (it could be a burst of external triggers),
17
+ * and a shallow queue doesn't necessarily mean health. This module responds
18
+ * to the *cause*: actual processing latency per message type.
19
+ *
20
+ * ## How it works
21
+ *
22
+ * Each engine router tracks an exponential moving average (EMA) of how long
23
+ * each canonical message type (transition, timehook, webhook, worker response,
24
+ * etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
25
+ * crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
26
+ * the manager computes a proportional throttle delay that the ThrottleManager
27
+ * applies as a floor on engine consumption rate.
28
+ *
29
+ * ## Hysteresis (asymmetric by design)
30
+ *
31
+ * Escalation is immediate — if the engine suddenly enters duress, the throttle
32
+ * kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
33
+ * (default 3) consecutive improving evaluations before dropping a level. This
34
+ * prevents oscillation: throttle → drain → un-throttle → refill → throttle.
35
+ * The EMA already smooths individual outliers; hysteresis gates the recovery
36
+ * path specifically.
37
+ *
38
+ * ## Quorum coordination
39
+ *
40
+ * When a router detects a level change (or remains in duress), it broadcasts
41
+ * a `'duress'` message via the quorum. Peers adopt the signal only if it's
42
+ * worse than their local state, so the mesh converges on the worst-case
43
+ * throttle without coordination.
44
+ *
45
+ * ## What this does NOT do
46
+ *
47
+ * External messages (triggers, signalIn/webhooks from the outside world) are
48
+ * never throttled. They always enter `engine_streams`. Only the engine
49
+ * routers' pull rate slows down, giving the system breathing room.
50
+ */
51
+ export declare class DuressManager {
52
+ private emas;
53
+ private sampleCounts;
54
+ private currentLevel;
55
+ private belowThresholdCount;
56
+ private duressThrottle;
57
+ private lastBroadcastAt;
58
+ private lastBroadcastLevel;
59
+ /**
60
+ * Record a processing duration for a message type.
61
+ * Updates the exponential moving average for that type.
62
+ */
63
+ recordLatency(type: StreamDataType, durationMs: number): void;
64
+ /**
65
+ * Evaluate duress state from current EMAs.
66
+ * Returns a snapshot with level, score, recommended throttle,
67
+ * and per-type latencies.
68
+ */
69
+ evaluate(): DuressSnapshot;
70
+ getDuressThrottle(): number;
71
+ getCurrentLevel(): DuressLevel;
72
+ /**
73
+ * Apply a duress snapshot received from another engine via quorum.
74
+ * Adopts the remote signal only if it indicates worse duress than local.
75
+ */
76
+ applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
77
+ /**
78
+ * Whether a quorum broadcast is warranted.
79
+ * Rate-limited and only fires when level changes or duress is active.
80
+ */
81
+ shouldBroadcast(): boolean;
82
+ markBroadcast(): void;
83
+ /**
84
+ * Returns a snapshot for inclusion in quorum rollcall profiles.
85
+ */
86
+ getSnapshot(): DuressSnapshot;
87
+ private scoreToLevel;
88
+ private scoreToThrottle;
89
+ private lerp;
90
+ private levelOrdinal;
91
+ }
@@ -0,0 +1,217 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DuressManager = void 0;
4
+ const config_1 = require("../config");
5
+ // Throttle band boundaries (ms)
6
+ const MILD_THROTTLE_MIN = 100;
7
+ const MILD_THROTTLE_MAX = 500;
8
+ const MODERATE_THROTTLE_MIN = 500;
9
+ const MODERATE_THROTTLE_MAX = 2000;
10
+ const SEVERE_THROTTLE_MIN = 2000;
11
+ const SEVERE_THROTTLE_MAX = 5000;
12
+ /**
13
+ * Adaptive engine duress detection via processing latency.
14
+ *
15
+ * ## Why this exists
16
+ *
17
+ * Prior fixes responded to queue *depth* (a symptom) — doubling reservation
18
+ * timeouts and halving batch sizes when the stream backed up. A deep queue
19
+ * doesn't necessarily mean duress (it could be a burst of external triggers),
20
+ * and a shallow queue doesn't necessarily mean health. This module responds
21
+ * to the *cause*: actual processing latency per message type.
22
+ *
23
+ * ## How it works
24
+ *
25
+ * Each engine router tracks an exponential moving average (EMA) of how long
26
+ * each canonical message type (transition, timehook, webhook, worker response,
27
+ * etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
28
+ * crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
29
+ * the manager computes a proportional throttle delay that the ThrottleManager
30
+ * applies as a floor on engine consumption rate.
31
+ *
32
+ * ## Hysteresis (asymmetric by design)
33
+ *
34
+ * Escalation is immediate — if the engine suddenly enters duress, the throttle
35
+ * kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
36
+ * (default 3) consecutive improving evaluations before dropping a level. This
37
+ * prevents oscillation: throttle → drain → un-throttle → refill → throttle.
38
+ * The EMA already smooths individual outliers; hysteresis gates the recovery
39
+ * path specifically.
40
+ *
41
+ * ## Quorum coordination
42
+ *
43
+ * When a router detects a level change (or remains in duress), it broadcasts
44
+ * a `'duress'` message via the quorum. Peers adopt the signal only if it's
45
+ * worse than their local state, so the mesh converges on the worst-case
46
+ * throttle without coordination.
47
+ *
48
+ * ## What this does NOT do
49
+ *
50
+ * External messages (triggers, signalIn/webhooks from the outside world) are
51
+ * never throttled. They always enter `engine_streams`. Only the engine
52
+ * routers' pull rate slows down, giving the system breathing room.
53
+ */
54
+ class DuressManager {
55
+ constructor() {
56
+ // Per-message-type exponential moving averages
57
+ this.emas = new Map();
58
+ this.sampleCounts = new Map();
59
+ // Hysteresis state
60
+ this.currentLevel = 'healthy';
61
+ this.belowThresholdCount = 0;
62
+ // Computed duress throttle floor
63
+ this.duressThrottle = 0;
64
+ // Broadcast rate limiting
65
+ this.lastBroadcastAt = 0;
66
+ this.lastBroadcastLevel = 'healthy';
67
+ }
68
+ /**
69
+ * Record a processing duration for a message type.
70
+ * Updates the exponential moving average for that type.
71
+ */
72
+ recordLatency(type, durationMs) {
73
+ const key = type;
74
+ const count = this.sampleCounts.get(key) || 0;
75
+ if (count === 0) {
76
+ // First sample: seed the EMA directly
77
+ this.emas.set(key, durationMs);
78
+ }
79
+ else {
80
+ const prev = this.emas.get(key);
81
+ this.emas.set(key, config_1.HMSH_DURESS_ALPHA * durationMs + (1 - config_1.HMSH_DURESS_ALPHA) * prev);
82
+ }
83
+ this.sampleCounts.set(key, count + 1);
84
+ }
85
+ /**
86
+ * Evaluate duress state from current EMAs.
87
+ * Returns a snapshot with level, score, recommended throttle,
88
+ * and per-type latencies.
89
+ */
90
+ evaluate() {
91
+ // Aggregate: max EMA across all tracked types
92
+ let maxEma = 0;
93
+ const perType = {};
94
+ for (const [type, ema] of this.emas) {
95
+ perType[type] = Math.round(ema);
96
+ if (ema > maxEma)
97
+ maxEma = ema;
98
+ }
99
+ const rawLevel = this.scoreToLevel(maxEma);
100
+ // Hysteresis: only drop level after sustained improvement
101
+ if (this.levelOrdinal(rawLevel) < this.levelOrdinal(this.currentLevel)) {
102
+ this.belowThresholdCount++;
103
+ if (this.belowThresholdCount >= config_1.HMSH_DURESS_HYSTERESIS_COUNT) {
104
+ this.currentLevel = rawLevel;
105
+ this.belowThresholdCount = 0;
106
+ }
107
+ // Keep current (higher) level until hysteresis clears
108
+ }
109
+ else {
110
+ // Same or worse: reset hysteresis counter, adopt immediately
111
+ this.belowThresholdCount = 0;
112
+ this.currentLevel = rawLevel;
113
+ }
114
+ this.duressThrottle =
115
+ this.currentLevel === 'healthy'
116
+ ? 0
117
+ : this.scoreToThrottle(maxEma, this.currentLevel);
118
+ return {
119
+ level: this.currentLevel,
120
+ score_ms: Math.round(maxEma),
121
+ throttle_ms: this.duressThrottle,
122
+ per_type: perType,
123
+ };
124
+ }
125
+ getDuressThrottle() {
126
+ return this.duressThrottle;
127
+ }
128
+ getCurrentLevel() {
129
+ return this.currentLevel;
130
+ }
131
+ /**
132
+ * Apply a duress snapshot received from another engine via quorum.
133
+ * Adopts the remote signal only if it indicates worse duress than local.
134
+ */
135
+ applyRemoteDuress(throttleMs, level) {
136
+ if (this.levelOrdinal(level) > this.levelOrdinal(this.currentLevel)) {
137
+ this.currentLevel = level;
138
+ this.duressThrottle = throttleMs;
139
+ this.belowThresholdCount = 0;
140
+ }
141
+ }
142
+ /**
143
+ * Whether a quorum broadcast is warranted.
144
+ * Rate-limited and only fires when level changes or duress is active.
145
+ */
146
+ shouldBroadcast() {
147
+ const now = Date.now();
148
+ if (now - this.lastBroadcastAt < config_1.HMSH_DURESS_BROADCAST_INTERVAL_MS) {
149
+ return false;
150
+ }
151
+ return (this.currentLevel !== this.lastBroadcastLevel ||
152
+ this.currentLevel !== 'healthy');
153
+ }
154
+ markBroadcast() {
155
+ this.lastBroadcastAt = Date.now();
156
+ this.lastBroadcastLevel = this.currentLevel;
157
+ }
158
+ /**
159
+ * Returns a snapshot for inclusion in quorum rollcall profiles.
160
+ */
161
+ getSnapshot() {
162
+ let maxEma = 0;
163
+ const perType = {};
164
+ for (const [type, ema] of this.emas) {
165
+ perType[type] = Math.round(ema);
166
+ if (ema > maxEma)
167
+ maxEma = ema;
168
+ }
169
+ return {
170
+ level: this.currentLevel,
171
+ score_ms: Math.round(maxEma),
172
+ throttle_ms: this.duressThrottle,
173
+ per_type: perType,
174
+ };
175
+ }
176
+ // --- Private helpers ---
177
+ scoreToLevel(ms) {
178
+ if (ms < config_1.HMSH_DURESS_HEALTHY_CEILING_MS)
179
+ return 'healthy';
180
+ if (ms < config_1.HMSH_DURESS_MILD_CEILING_MS)
181
+ return 'mild';
182
+ if (ms < config_1.HMSH_DURESS_MODERATE_CEILING_MS)
183
+ return 'moderate';
184
+ return 'severe';
185
+ }
186
+ scoreToThrottle(ms, level) {
187
+ // Linear interpolation within the band for the given level
188
+ switch (level) {
189
+ case 'healthy':
190
+ return 0;
191
+ case 'mild':
192
+ return this.lerp(ms, config_1.HMSH_DURESS_HEALTHY_CEILING_MS, config_1.HMSH_DURESS_MILD_CEILING_MS, MILD_THROTTLE_MIN, MILD_THROTTLE_MAX);
193
+ case 'moderate':
194
+ return this.lerp(ms, config_1.HMSH_DURESS_MILD_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS, MODERATE_THROTTLE_MIN, MODERATE_THROTTLE_MAX);
195
+ case 'severe':
196
+ // Clamp to severe band max; beyond the ceiling is still severe
197
+ return this.lerp(ms, config_1.HMSH_DURESS_MODERATE_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS * 2, SEVERE_THROTTLE_MIN, SEVERE_THROTTLE_MAX);
198
+ }
199
+ }
200
+ lerp(value, inMin, inMax, outMin, outMax) {
201
+ const t = Math.min(Math.max((value - inMin) / (inMax - inMin), 0), 1);
202
+ return Math.round(outMin + t * (outMax - outMin));
203
+ }
204
+ levelOrdinal(level) {
205
+ switch (level) {
206
+ case 'healthy':
207
+ return 0;
208
+ case 'mild':
209
+ return 1;
210
+ case 'moderate':
211
+ return 2;
212
+ case 'severe':
213
+ return 3;
214
+ }
215
+ }
216
+ }
217
+ exports.DuressManager = DuressManager;
@@ -2,7 +2,9 @@
2
2
  import { ILogger } from '../logger';
3
3
  import { StreamService } from '../stream';
4
4
  import { RouterConfig, StreamData, StreamDataResponse, StreamRole } from '../../types/stream';
5
+ import { DuressLevel } from '../../types/quorum';
5
6
  import { ProviderClient, ProviderTransaction } from '../../types/provider';
7
+ import { DuressSnapshot } from './duress';
6
8
  declare class Router<S extends StreamService<ProviderClient, ProviderTransaction>> {
7
9
  appId: string;
8
10
  guid: string;
@@ -29,6 +31,8 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
29
31
  private errorHandler;
30
32
  private lifecycleManager;
31
33
  private consumptionManager;
34
+ private duressManager?;
35
+ private _pendingDuressSnapshot?;
32
36
  constructor(config: RouterConfig, stream: S, logger: ILogger);
33
37
  get throttle(): number;
34
38
  get shouldConsume(): boolean;
@@ -49,6 +53,9 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
49
53
  structureUnhandledError(input: StreamData, err: Error): StreamDataResponse;
50
54
  structureUnacknowledgedError(input: StreamData): StreamDataResponse;
51
55
  structureError(input: StreamData, output: StreamDataResponse): StreamDataResponse;
56
+ setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
57
+ applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
58
+ getDuressSnapshot(): DuressSnapshot | undefined;
52
59
  static stopConsuming(): Promise<void>;
53
60
  stopConsuming(): Promise<void>;
54
61
  cancelThrottle(): void;
@@ -1,12 +1,14 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Router = void 0;
4
+ const stream_1 = require("../../types/stream");
4
5
  // Import the new submodules
5
6
  const config_1 = require("./config");
6
7
  const throttling_1 = require("./throttling");
7
8
  const error_handling_1 = require("./error-handling");
8
9
  const lifecycle_1 = require("./lifecycle");
9
10
  const consumption_1 = require("./consumption");
11
+ const duress_1 = require("./duress");
10
12
  class Router {
11
13
  constructor(config, stream, logger) {
12
14
  // Legacy properties for backward compatibility
@@ -34,7 +36,11 @@ class Router {
34
36
  this.throttleManager = new throttling_1.ThrottleManager(enhancedConfig.throttle);
35
37
  this.errorHandler = new error_handling_1.ErrorHandler();
36
38
  this.lifecycleManager = new lifecycle_1.LifecycleManager(this.readonly, this.topic, this.logger, this.stream);
37
- this.consumptionManager = new consumption_1.ConsumptionManager(this.stream, this.logger, this.throttleManager, this.errorHandler, this.lifecycleManager, this.reclaimDelay, this.reclaimCount, this.appId, this.role, this, this.retry);
39
+ // Engine routers get duress detection; workers do not
40
+ if (this.role === stream_1.StreamRole.ENGINE) {
41
+ this.duressManager = new duress_1.DuressManager();
42
+ }
43
+ this.consumptionManager = new consumption_1.ConsumptionManager(this.stream, this.logger, this.throttleManager, this.errorHandler, this.lifecycleManager, this.reclaimDelay, this.reclaimCount, this.appId, this.role, this, this.retry, this.duressManager);
38
44
  this.resetThrottleState();
39
45
  }
40
46
  // Legacy compatibility methods
@@ -99,6 +105,17 @@ class Router {
99
105
  structureError(input, output) {
100
106
  return this.errorHandler.structureError(input, output);
101
107
  }
108
+ // Duress detection methods (engine routers only)
109
+ setDuressCallback(callback) {
110
+ this.consumptionManager.setDuressCallback(callback);
111
+ }
112
+ applyRemoteDuress(throttleMs, level) {
113
+ this.duressManager?.applyRemoteDuress(throttleMs, level);
114
+ this.throttleManager.setDuressFloor(throttleMs);
115
+ }
116
+ getDuressSnapshot() {
117
+ return this.duressManager?.getSnapshot();
118
+ }
102
119
  // Static methods for instance management
103
120
  static async stopConsuming() {
104
121
  return lifecycle_1.InstanceRegistry.stopAll();
@@ -1,11 +1,39 @@
1
+ /**
2
+ * Elastic throttle with two independent inputs:
3
+ *
4
+ * 1. **User throttle** — set explicitly via quorum `throttle` command.
5
+ * Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
6
+ *
7
+ * 2. **Duress floor** — set automatically by the DuressManager based on
8
+ * processing latency. The effective throttle is `max(user, duress)`,
9
+ * so duress never reduces below what the user set, and pause always
10
+ * takes precedence. When duress clears (floor returns to 0), the
11
+ * user's original throttle remains in effect.
12
+ *
13
+ * `customSleep()` uses the effective throttle, supports dynamic
14
+ * interruption (if the throttle decreases mid-sleep, the router wakes
15
+ * early), and handles pause via a bare promise with no timer.
16
+ */
1
17
  export declare class ThrottleManager {
2
18
  private throttle;
19
+ private duressFloor;
3
20
  private isSleeping;
4
21
  private sleepPromiseResolve;
5
22
  private innerPromiseResolve;
6
23
  private sleepTimeout;
7
24
  constructor(initialThrottle?: number);
8
25
  getThrottle(): number;
26
+ /**
27
+ * Set the duress-computed throttle floor. The effective throttle
28
+ * is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
29
+ */
30
+ setDuressFloor(delayMs: number): void;
31
+ getDuressFloor(): number;
32
+ /**
33
+ * Returns the effective throttle: max of user-set throttle and
34
+ * duress floor. Pause (negative) always takes precedence.
35
+ */
36
+ getEffectiveThrottle(): number;
9
37
  setThrottle(delayInMillis: number): void;
10
38
  isPaused(): boolean;
11
39
  /**
@@ -1,9 +1,26 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ThrottleManager = void 0;
4
+ /**
5
+ * Elastic throttle with two independent inputs:
6
+ *
7
+ * 1. **User throttle** — set explicitly via quorum `throttle` command.
8
+ * Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
9
+ *
10
+ * 2. **Duress floor** — set automatically by the DuressManager based on
11
+ * processing latency. The effective throttle is `max(user, duress)`,
12
+ * so duress never reduces below what the user set, and pause always
13
+ * takes precedence. When duress clears (floor returns to 0), the
14
+ * user's original throttle remains in effect.
15
+ *
16
+ * `customSleep()` uses the effective throttle, supports dynamic
17
+ * interruption (if the throttle decreases mid-sleep, the router wakes
18
+ * early), and handles pause via a bare promise with no timer.
19
+ */
4
20
  class ThrottleManager {
5
21
  constructor(initialThrottle = 0) {
6
22
  this.throttle = 0;
23
+ this.duressFloor = 0;
7
24
  this.isSleeping = false;
8
25
  this.sleepPromiseResolve = null;
9
26
  this.innerPromiseResolve = null;
@@ -13,6 +30,25 @@ class ThrottleManager {
13
30
  getThrottle() {
14
31
  return this.throttle;
15
32
  }
33
+ /**
34
+ * Set the duress-computed throttle floor. The effective throttle
35
+ * is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
36
+ */
37
+ setDuressFloor(delayMs) {
38
+ this.duressFloor = Math.max(0, delayMs);
39
+ }
40
+ getDuressFloor() {
41
+ return this.duressFloor;
42
+ }
43
+ /**
44
+ * Returns the effective throttle: max of user-set throttle and
45
+ * duress floor. Pause (negative) always takes precedence.
46
+ */
47
+ getEffectiveThrottle() {
48
+ if (this.throttle < 0)
49
+ return this.throttle; // pause overrides
50
+ return Math.max(this.throttle, this.duressFloor);
51
+ }
16
52
  setThrottle(delayInMillis) {
17
53
  const wasPaused = this.throttle < 0;
18
54
  const wasDecreased = delayInMillis < this.throttle;
@@ -45,12 +81,13 @@ class ThrottleManager {
45
81
  * setThrottle() is called with a non-negative value.
46
82
  */
47
83
  async customSleep() {
48
- if (this.throttle === 0)
84
+ const effective = this.getEffectiveThrottle();
85
+ if (effective === 0)
49
86
  return;
50
87
  if (this.isSleeping)
51
88
  return;
52
89
  this.isSleeping = true;
53
- if (this.throttle < 0) {
90
+ if (effective < 0) {
54
91
  // Paused: wait indefinitely until setThrottle interrupts
55
92
  await new Promise((resolve) => {
56
93
  this.innerPromiseResolve = resolve;
@@ -62,12 +99,14 @@ class ThrottleManager {
62
99
  await new Promise(async (outerResolve) => {
63
100
  this.sleepPromiseResolve = outerResolve;
64
101
  let elapsedTime = Date.now() - startTime;
65
- while (elapsedTime < this.throttle && this.throttle > 0) {
102
+ let target = this.getEffectiveThrottle();
103
+ while (elapsedTime < target && target > 0) {
66
104
  await new Promise((innerResolve) => {
67
105
  this.innerPromiseResolve = innerResolve;
68
- this.sleepTimeout = setTimeout(innerResolve, this.throttle - elapsedTime);
106
+ this.sleepTimeout = setTimeout(innerResolve, target - elapsedTime);
69
107
  });
70
108
  elapsedTime = Date.now() - startTime;
109
+ target = this.getEffectiveThrottle();
71
110
  }
72
111
  this.resetThrottleState();
73
112
  outerResolve();
@@ -23,28 +23,33 @@ const KVTables = (context) => ({
23
23
  client = transactionClient;
24
24
  }
25
25
  try {
26
- // First, check if tables already exist (no lock needed)
27
26
  const tablesExist = await this.checkIfTablesExist(client, appName);
28
- if (tablesExist) {
29
- // Tables exist; apply any pending migrations
30
- await this.migrate(client, appName);
31
- return;
32
- }
33
- // Tables don't exist, need to acquire lock and create them
27
+ // Acquire advisory lock for ALL DDL: table creation and
28
+ // migrations. CREATE INDEX IF NOT EXISTS is not atomic under
29
+ // concurrent transactions — two sessions can both see the
30
+ // index as absent and both attempt creation, causing a
31
+ // unique_violation on pg_class_relname_nsp_index.
34
32
  const lockId = this.getAdvisoryLockId(appName);
35
33
  const lockResult = await client.query('SELECT pg_try_advisory_lock($1) AS locked', [lockId]);
36
34
  if (lockResult.rows[0].locked) {
37
- // Begin transaction
38
- await client.query('BEGIN');
39
- // Double-check tables don't exist (race condition safety)
40
- const tablesStillMissing = !(await this.checkIfTablesExist(client, appName));
41
- if (tablesStillMissing) {
42
- await this.createTables(client, appName);
35
+ try {
36
+ if (!tablesExist) {
37
+ // Begin transaction
38
+ await client.query('BEGIN');
39
+ // Double-check tables don't exist (race condition safety)
40
+ const tablesStillMissing = !(await this.checkIfTablesExist(client, appName));
41
+ if (tablesStillMissing) {
42
+ await this.createTables(client, appName);
43
+ }
44
+ // Commit transaction
45
+ await client.query('COMMIT');
46
+ }
47
+ // Always run migrations under the lock
48
+ await this.migrate(client, appName);
49
+ }
50
+ finally {
51
+ await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
43
52
  }
44
- // Commit transaction
45
- await client.query('COMMIT');
46
- // Release the lock
47
- await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
48
53
  }
49
54
  else {
50
55
  // Release the client before waiting
@@ -172,6 +177,11 @@ const KVTables = (context) => ({
172
177
  const fullTableName = `${tableDef.schema}.${tableDef.name}`;
173
178
  switch (tableDef.type) {
174
179
  case 'relational_app':
180
+ // Public tables are shared across all appIds. Use a fixed
181
+ // advisory lock to prevent concurrent CREATE TABLE races
182
+ // from different appId deployments (each has its own
183
+ // per-appId lock, but those don't overlap).
184
+ await client.query('SELECT pg_advisory_xact_lock($1)', [0x484D5348]);
175
185
  await client.query(`
176
186
  CREATE TABLE IF NOT EXISTS ${fullTableName} (
177
187
  app_id TEXT PRIMARY KEY,
@@ -11,25 +11,28 @@ async function deploySchema(streamClient, appId, logger) {
11
11
  const releaseClient = isPool;
12
12
  try {
13
13
  const schemaName = appId.replace(/[^a-zA-Z0-9_]/g, '_');
14
- // First, check if tables already exist (no lock needed)
15
14
  const tablesExist = await checkIfTablesExist(client, schemaName);
16
- if (tablesExist) {
17
- await ensureIndexes(client, schemaName);
18
- return;
19
- }
20
- // Tables don't exist, need to acquire lock and create them
15
+ // Acquire advisory lock for ALL DDL: table creation, index
16
+ // migrations, and trigger setup. CREATE INDEX IF NOT EXISTS is
17
+ // not atomic under concurrent transactions — two sessions can
18
+ // both see the index as absent and both attempt creation,
19
+ // causing a unique_violation on pg_class_relname_nsp_index.
21
20
  const lockId = getAdvisoryLockId(appId);
22
21
  const lockResult = await client.query('SELECT pg_try_advisory_lock($1) AS locked', [lockId]);
23
22
  if (lockResult.rows[0].locked) {
24
23
  try {
25
- await client.query('BEGIN');
26
- // Double-check tables don't exist (race condition safety)
27
- const tablesStillMissing = !(await checkIfTablesExist(client, schemaName));
28
- if (tablesStillMissing) {
29
- await createTables(client, schemaName);
30
- await createNotificationTriggers(client, schemaName);
24
+ if (!tablesExist) {
25
+ await client.query('BEGIN');
26
+ // Double-check tables don't exist (race condition safety)
27
+ const tablesStillMissing = !(await checkIfTablesExist(client, schemaName));
28
+ if (tablesStillMissing) {
29
+ await createTables(client, schemaName);
30
+ await createNotificationTriggers(client, schemaName);
31
+ }
32
+ await client.query('COMMIT');
31
33
  }
32
- await client.query('COMMIT');
34
+ // Always run index migrations under the lock
35
+ await ensureIndexes(client, schemaName);
33
36
  }
34
37
  finally {
35
38
  await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
@@ -1,5 +1,7 @@
1
1
  import { JobOutput } from './job';
2
2
  import { StringAnyType } from './serializer';
3
+ /** Duress severity level for adaptive engine throttling. */
4
+ export type DuressLevel = 'healthy' | 'mild' | 'moderate' | 'severe';
3
5
  export interface CPULoad {
4
6
  [cpu: string]: string;
5
7
  }
@@ -86,6 +88,12 @@ export interface QuorumProfile {
86
88
  system?: SystemHealth;
87
89
  /** Stringified worker callback function (only if `signature: true` in rollcall). */
88
90
  signature?: string;
91
+ /** Current duress level. Engine routers only. */
92
+ duress_level?: DuressLevel;
93
+ /** Current duress score in ms (max EMA across message types). Engine routers only. */
94
+ duress_score_ms?: number;
95
+ /** Per-message-type EMA latencies in ms. Engine routers only. */
96
+ duress_per_type?: Record<string, number>;
89
97
  }
90
98
  interface QuorumMessageBase {
91
99
  entity?: string;
@@ -138,6 +146,17 @@ export interface ThrottleMessage extends QuorumMessageBase {
138
146
  topic?: string;
139
147
  throttle: number;
140
148
  }
149
+ export interface DuressMessage extends QuorumMessageBase {
150
+ type: 'duress';
151
+ /** GUID of the engine that detected duress */
152
+ originator: string;
153
+ /** Aggregate duress score (max EMA across message types) in ms */
154
+ duress_score_ms: number;
155
+ /** Recommended throttle delay in ms */
156
+ throttle_ms: number;
157
+ /** Duress severity level */
158
+ level: DuressLevel;
159
+ }
141
160
  export interface RollCallMessage extends QuorumMessageBase {
142
161
  type: 'rollcall';
143
162
  guid?: string;
@@ -169,5 +188,5 @@ export type SubscriptionOptions = {
169
188
  * These messages serve to coordinate the cache invalidation and switch-over
170
189
  * to the new version without any downtime and a coordinating parent server.
171
190
  */
172
- export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | RollCallMessage | CronMessage | UserMessage;
191
+ export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | DuressMessage | RollCallMessage | CronMessage | UserMessage;
173
192
  export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hotmeshio/hotmesh",
3
- "version": "0.19.3",
3
+ "version": "0.19.5",
4
4
  "description": "Durable Workflow",
5
5
  "main": "./build/index.js",
6
6
  "types": "./build/index.d.ts",