npm - @hotmeshio/hotmesh - Versions diffs - 0.19.4 → 0.20.0 - Mend

@hotmeshio/hotmesh 0.19.4 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/build/modules/enums.d.ts +38 -0
package/build/modules/enums.js +40 -1
package/build/package.json +1 -1
package/build/services/engine/index.d.ts +6 -0
package/build/services/engine/index.js +8 -0
package/build/services/hotmesh/index.js +14 -0
package/build/services/quorum/index.js +13 -0
package/build/services/router/config/index.d.ts +2 -2
package/build/services/router/config/index.js +8 -1
package/build/services/router/consumption/index.d.ts +6 -1
package/build/services/router/consumption/index.js +35 -1
package/build/services/router/duress/index.d.ts +91 -0
package/build/services/router/duress/index.js +217 -0
package/build/services/router/index.d.ts +7 -0
package/build/services/router/index.js +18 -1
package/build/services/router/throttling/index.d.ts +28 -0
package/build/services/router/throttling/index.js +43 -4
package/build/services/stream/providers/postgres/kvtables.js +74 -36
package/build/services/stream/providers/postgres/messages.js +16 -6
package/build/services/stream/providers/postgres/postgres.d.ts +7 -0
package/build/services/stream/providers/postgres/postgres.js +26 -3
package/build/services/stream/providers/postgres/procedures.js +10 -3
package/build/types/quorum.d.ts +20 -1
package/build/types/stream.d.ts +4 -0
package/package.json +1 -1

package/build/modules/enums.d.ts CHANGED Viewed

@@ -256,6 +256,44 @@ export declare const HMSH_GUID_SIZE: number;
  * Default task queue name used when no task queue is specified
  */
 export declare const DEFAULT_TASK_QUEUE = "default";
+/**
+ * EMA smoothing factor for duress latency tracking.
+ * Higher = faster response to spikes, lower = more stable.
+ * @default 0.3
+ */
+export declare const HMSH_DURESS_ALPHA: number;
+/**
+ * Number of messages between duress evaluations.
+ * @default 10
+ */
+export declare const HMSH_DURESS_EVAL_INTERVAL: number;
+/**
+ * Max EMA (ms) below which the engine is considered healthy. No throttle applied.
+ * @default 200
+ */
+export declare const HMSH_DURESS_HEALTHY_CEILING_MS: number;
+/**
+ * Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
+ * @default 1000
+ */
+export declare const HMSH_DURESS_MILD_CEILING_MS: number;
+/**
+ * Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
+ * Above this threshold, duress is severe (2000-5000ms throttle).
+ * @default 5000
+ */
+export declare const HMSH_DURESS_MODERATE_CEILING_MS: number;
+/**
+ * Minimum interval (ms) between quorum duress broadcasts.
+ * @default 5000
+ */
+export declare const HMSH_DURESS_BROADCAST_INTERVAL_MS: number;
+/**
+ * Number of consecutive improving evaluations required before
+ * dropping a duress level. Prevents oscillation.
+ * @default 3
+ */
+export declare const HMSH_DURESS_HYSTERESIS_COUNT: number;
 /**
  * PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
  * a reference message is sent instead and the subscriber fetches via getState.

package/build/modules/enums.js CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_ENGINE_CONCURRENCY = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_CYCLES = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
-exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
+exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
 /**
  * Determines the log level for the application. The default is 'info'.
  */
@@ -288,6 +288,45 @@ exports.HMSH_GUID_SIZE = Math.min(parseInt(process.env.HMSH_GUID_SIZE, 10) || 22
  * Default task queue name used when no task queue is specified
  */
 exports.DEFAULT_TASK_QUEUE = 'default';
+// DURESS DETECTION — adaptive engine throttling based on processing latency
+/**
+ * EMA smoothing factor for duress latency tracking.
+ * Higher = faster response to spikes, lower = more stable.
+ * @default 0.3
+ */
+exports.HMSH_DURESS_ALPHA = parseFloat(process.env.HMSH_DURESS_ALPHA) || 0.3;
+/**
+ * Number of messages between duress evaluations.
+ * @default 10
+ */
+exports.HMSH_DURESS_EVAL_INTERVAL = parseInt(process.env.HMSH_DURESS_EVAL_INTERVAL, 10) || 10;
+/**
+ * Max EMA (ms) below which the engine is considered healthy. No throttle applied.
+ * @default 200
+ */
+exports.HMSH_DURESS_HEALTHY_CEILING_MS = parseInt(process.env.HMSH_DURESS_HEALTHY_CEILING_MS, 10) || 200;
+/**
+ * Max EMA (ms) below which duress is mild. Light throttle (100-500ms).
+ * @default 1000
+ */
+exports.HMSH_DURESS_MILD_CEILING_MS = parseInt(process.env.HMSH_DURESS_MILD_CEILING_MS, 10) || 1000;
+/**
+ * Max EMA (ms) below which duress is moderate. Moderate throttle (500-2000ms).
+ * Above this threshold, duress is severe (2000-5000ms throttle).
+ * @default 5000
+ */
+exports.HMSH_DURESS_MODERATE_CEILING_MS = parseInt(process.env.HMSH_DURESS_MODERATE_CEILING_MS, 10) || 5000;
+/**
+ * Minimum interval (ms) between quorum duress broadcasts.
+ * @default 5000
+ */
+exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = parseInt(process.env.HMSH_DURESS_BROADCAST_INTERVAL_MS, 10) || 5000;
+/**
+ * Number of consecutive improving evaluations required before
+ * dropping a duress level. Prevents oscillation.
+ * @default 3
+ */
+exports.HMSH_DURESS_HYSTERESIS_COUNT = parseInt(process.env.HMSH_DURESS_HYSTERESIS_COUNT, 10) || 3;
 /**
  * PostgreSQL NOTIFY payload limit. If a job message exceeds this size,
  * a reference message is sent instead and the subscriber fetches via getState.

package/build/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@hotmeshio/hotmesh",
-    "version": "0.19.4",
+    "version": "0.20.0",
     "description": "Durable Workflow",
     "main": "./build/index.js",
     "types": "./build/index.d.ts",

package/build/services/engine/index.d.ts CHANGED Viewed

@@ -167,6 +167,12 @@ declare class EngineService {
      * @private
      */
     throttle(delayInMillis: number): Promise<void>;
+    /**
+     * Apply a remote duress signal from the quorum.
+     * Delegates to the router's duress manager.
+     * @private
+     */
+    applyRemoteDuress(throttleMs: number, level: string): void;
     /**
      * @private
      */

package/build/services/engine/index.js CHANGED Viewed

@@ -267,6 +267,14 @@ class EngineService {
     async throttle(delayInMillis) {
         return Signal.throttle(this, delayInMillis);
     }
+    /**
+     * Apply a remote duress signal from the quorum.
+     * Delegates to the router's duress manager.
+     * @private
+     */
+    applyRemoteDuress(throttleMs, level) {
+        this.router?.applyRemoteDuress(throttleMs, level);
+    }
     // ═════════════════════════════════════════════════════════════════
     //  9. PUB/SUB — topic messaging, subscriptions, callbacks
     //     → see pubsub.ts

package/build/services/hotmesh/index.js CHANGED Viewed

@@ -181,6 +181,20 @@ class HotMesh {
         instance.logger = new logger_1.LoggerService(config.appId, instance.guid, config.name || '', config.logLevel);
         await Init.initEngine(instance, config, instance.logger);
         await Init.initQuorum(instance, config, instance.engine, instance.logger);
+        // Register duress broadcast callback: engine router → quorum
+        if (instance.engine?.router && instance.quorum) {
+            const quorum = instance.quorum;
+            const engineGuid = instance.guid;
+            instance.engine.router.setDuressCallback((snapshot) => {
+                quorum.pub({
+                    type: 'duress',
+                    originator: engineGuid,
+                    duress_score_ms: snapshot.score_ms,
+                    throttle_ms: snapshot.throttle_ms,
+                    level: snapshot.level,
+                });
+            });
+        }
         await Init.doWork(instance, config, instance.logger);
         return instance;
     }

package/build/services/quorum/index.js CHANGED Viewed

@@ -111,6 +111,12 @@ class QuorumService {
             else if (message.type === 'cron') {
                 self.engine.processTimeHooks();
             }
+            else if (message.type === 'duress') {
+                // Apply remote duress signal (skip our own broadcasts)
+                if (message.originator !== self.guid) {
+                    self.engine.applyRemoteDuress(message.throttle_ms, message.level);
+                }
+            }
             else if (message.type === 'rollcall') {
                 self.doRollCall(message);
             }
@@ -147,6 +153,13 @@ class QuorumService {
                 reclaimCount: this.engine.router.reclaimCount,
                 system: await (0, utils_1.getSystemHealth)(),
             };
+            // Include duress info if available (engine routers only)
+            const duressSnapshot = this.engine.router.getDuressSnapshot?.();
+            if (duressSnapshot) {
+                profile.duress_level = duressSnapshot.level;
+                profile.duress_score_ms = duressSnapshot.score_ms;
+                profile.duress_per_type = duressSnapshot.per_type;
+            }
         }
         this.subscribe.publish(hotmesh_1.KeyType.QUORUM, {
             type: 'pong',

package/build/services/router/config/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
+import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT } from '../../../modules/enums';
 import { RouterConfig } from '../../../types/stream';
 export declare class RouterConfigManager {
     static validateThrottle(delayInMillis: number): void;
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
         readonly: boolean;
     };
 }
-export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
+export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_BATCH_SIZE_MIN, HMSH_RESERVATION_TIMEOUT_S, HMSH_RESERVATION_TIMEOUT_MAX_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, HMSH_DURESS_ALPHA, HMSH_DURESS_EVAL_INTERVAL, HMSH_DURESS_HEALTHY_CEILING_MS, HMSH_DURESS_MILD_CEILING_MS, HMSH_DURESS_MODERATE_CEILING_MS, HMSH_DURESS_BROADCAST_INTERVAL_MS, HMSH_DURESS_HYSTERESIS_COUNT, };

package/build/services/router/config/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
+exports.HMSH_DURESS_HYSTERESIS_COUNT = exports.HMSH_DURESS_BROADCAST_INTERVAL_MS = exports.HMSH_DURESS_MODERATE_CEILING_MS = exports.HMSH_DURESS_MILD_CEILING_MS = exports.HMSH_DURESS_HEALTHY_CEILING_MS = exports.HMSH_DURESS_EVAL_INTERVAL = exports.HMSH_DURESS_ALPHA = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
 const enums_1 = require("../../../modules/enums");
 Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
 Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
@@ -21,6 +21,13 @@ Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: fu
 Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
 Object.defineProperty(exports, "MAX_STREAM_RETRIES", { enumerable: true, get: function () { return enums_1.MAX_STREAM_RETRIES; } });
 Object.defineProperty(exports, "HMSH_POISON_MESSAGE_THRESHOLD", { enumerable: true, get: function () { return enums_1.HMSH_POISON_MESSAGE_THRESHOLD; } });
+Object.defineProperty(exports, "HMSH_DURESS_ALPHA", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_ALPHA; } });
+Object.defineProperty(exports, "HMSH_DURESS_EVAL_INTERVAL", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_EVAL_INTERVAL; } });
+Object.defineProperty(exports, "HMSH_DURESS_HEALTHY_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HEALTHY_CEILING_MS; } });
+Object.defineProperty(exports, "HMSH_DURESS_MILD_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MILD_CEILING_MS; } });
+Object.defineProperty(exports, "HMSH_DURESS_MODERATE_CEILING_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_MODERATE_CEILING_MS; } });
+Object.defineProperty(exports, "HMSH_DURESS_BROADCAST_INTERVAL_MS", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_BROADCAST_INTERVAL_MS; } });
+Object.defineProperty(exports, "HMSH_DURESS_HYSTERESIS_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_DURESS_HYSTERESIS_COUNT; } });
 class RouterConfigManager {
     static validateThrottle(delayInMillis) {
         if (!Number.isInteger(delayInMillis) ||

package/build/services/router/consumption/index.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { StreamService } from '../../stream';
 import { ThrottleManager } from '../throttling';
 import { ErrorHandler } from '../error-handling';
 import { LifecycleManager } from '../lifecycle';
+import { DuressManager, DuressSnapshot } from '../duress';
 import { StreamData, StreamDataResponse } from '../../../types/stream';
 import { ProviderClient, ProviderTransaction } from '../../../types/provider';
 export declare class ConsumptionManager<S extends StreamService<ProviderClient, ProviderTransaction>> {
@@ -26,6 +27,9 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
     private set hasReachedMaxBackoff(value);
     private router;
     private retry;
+    private duressManager?;
+    private onDuressChange?;
+    private messagesSinceLastEval;
     private adaptiveReservationTimeout;
     private adaptiveBatchSize;
     private lastDepthCheckAt;
@@ -33,7 +37,8 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
     private static readonly DEPTH_SCALE_UP_THRESHOLD;
     private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
     private static readonly LEASE_BUFFER_S;
-    constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
+    constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy, duressManager?: DuressManager);
+    setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
     /**
      * Adjusts reservation timeout based on stream depth. Called periodically
      * from the consume loop. When depth is high:

package/build/services/router/consumption/index.js CHANGED Viewed

@@ -17,7 +17,8 @@ class ConsumptionManager {
     get counts() { return this.router.counts; }
     get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
     set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
-    constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
+    constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry, duressManager) {
+        this.messagesSinceLastEval = 0;
         // Adaptive consumption pressure — scales reservation timeout AND batch
         // size based on stream depth. Under load: timeout grows (prevents
         // duplicate re-reservation) and batch size shrinks (reduces in-memory
@@ -37,6 +38,10 @@ class ConsumptionManager {
         this.role = role;
         this.router = router;
         this.retry = retry;
+        this.duressManager = duressManager;
+    }
+    setDuressCallback(callback) {
+        this.onDuressChange = callback;
     }
     /**
      * Adjusts reservation timeout based on stream depth. Called periodically
@@ -500,6 +505,7 @@ class ConsumptionManager {
         const deadlineMs = this.adaptiveReservationTimeout * 1000;
         let output;
         const telemetry = new telemetry_1.RouterTelemetry(this.appId);
+        const processingStart = Date.now();
         try {
             telemetry.startStreamSpan(input, this.role);
             let deadlineTimer;
@@ -549,6 +555,34 @@ class ConsumptionManager {
             telemetry.setStreamErrorFromException(err);
             output = this.errorHandler.structureUnhandledError(input, err instanceof Error ? err : new Error(String(err)));
         }
+        // Record processing latency for duress detection (engine routers only).
+        // This measures the actual time spent in execStreamLeg — the causal
+        // signal. The prior depth-based mechanism (adjustConsumptionPressure)
+        // responds to queue backlog; this responds to *why* the backlog exists.
+        // Evaluation is amortized over HMSH_DURESS_EVAL_INTERVAL messages to
+        // avoid per-message overhead.
+        if (this.duressManager && input.type) {
+            const processingDuration = Date.now() - processingStart;
+            this.duressManager.recordLatency(input.type, processingDuration);
+            if (++this.messagesSinceLastEval >= config_1.HMSH_DURESS_EVAL_INTERVAL) {
+                this.messagesSinceLastEval = 0;
+                const snapshot = this.duressManager.evaluate();
+                this.throttleManager.setDuressFloor(snapshot.throttle_ms);
+                if (snapshot.level !== 'healthy') {
+                    this.logger.info('stream-duress-detected', {
+                        stream,
+                        level: snapshot.level,
+                        score_ms: snapshot.score_ms,
+                        throttle_ms: snapshot.throttle_ms,
+                        per_type: snapshot.per_type,
+                    });
+                }
+                if (this.duressManager.shouldBroadcast() && this.onDuressChange) {
+                    this.duressManager.markBroadcast();
+                    this.onDuressChange(snapshot);
+                }
+            }
+        }
         try {
             // When the ENGINE encounters an infrastructure error (schema not found,
             // subscription missing — code 598), the message is permanently unprocessable.

package/build/services/router/duress/index.d.ts ADDED Viewed

@@ -0,0 +1,91 @@
+import { StreamDataType } from '../../../types/stream';
+import { DuressLevel } from '../../../types/quorum';
+export interface DuressSnapshot {
+    level: DuressLevel;
+    score_ms: number;
+    throttle_ms: number;
+    per_type: Record<string, number>;
+}
+/**
+ * Adaptive engine duress detection via processing latency.
+ *
+ * ## Why this exists
+ *
+ * Prior fixes responded to queue *depth* (a symptom) — doubling reservation
+ * timeouts and halving batch sizes when the stream backed up. A deep queue
+ * doesn't necessarily mean duress (it could be a burst of external triggers),
+ * and a shallow queue doesn't necessarily mean health. This module responds
+ * to the *cause*: actual processing latency per message type.
+ *
+ * ## How it works
+ *
+ * Each engine router tracks an exponential moving average (EMA) of how long
+ * each canonical message type (transition, timehook, webhook, worker response,
+ * etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
+ * crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
+ * the manager computes a proportional throttle delay that the ThrottleManager
+ * applies as a floor on engine consumption rate.
+ *
+ * ## Hysteresis (asymmetric by design)
+ *
+ * Escalation is immediate — if the engine suddenly enters duress, the throttle
+ * kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
+ * (default 3) consecutive improving evaluations before dropping a level. This
+ * prevents oscillation: throttle → drain → un-throttle → refill → throttle.
+ * The EMA already smooths individual outliers; hysteresis gates the recovery
+ * path specifically.
+ *
+ * ## Quorum coordination
+ *
+ * When a router detects a level change (or remains in duress), it broadcasts
+ * a `'duress'` message via the quorum. Peers adopt the signal only if it's
+ * worse than their local state, so the mesh converges on the worst-case
+ * throttle without coordination.
+ *
+ * ## What this does NOT do
+ *
+ * External messages (triggers, signalIn/webhooks from the outside world) are
+ * never throttled. They always enter `engine_streams`. Only the engine
+ * routers' pull rate slows down, giving the system breathing room.
+ */
+export declare class DuressManager {
+    private emas;
+    private sampleCounts;
+    private currentLevel;
+    private belowThresholdCount;
+    private duressThrottle;
+    private lastBroadcastAt;
+    private lastBroadcastLevel;
+    /**
+     * Record a processing duration for a message type.
+     * Updates the exponential moving average for that type.
+     */
+    recordLatency(type: StreamDataType, durationMs: number): void;
+    /**
+     * Evaluate duress state from current EMAs.
+     * Returns a snapshot with level, score, recommended throttle,
+     * and per-type latencies.
+     */
+    evaluate(): DuressSnapshot;
+    getDuressThrottle(): number;
+    getCurrentLevel(): DuressLevel;
+    /**
+     * Apply a duress snapshot received from another engine via quorum.
+     * Adopts the remote signal only if it indicates worse duress than local.
+     */
+    applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
+    /**
+     * Whether a quorum broadcast is warranted.
+     * Rate-limited and only fires when level changes or duress is active.
+     */
+    shouldBroadcast(): boolean;
+    markBroadcast(): void;
+    /**
+     * Returns a snapshot for inclusion in quorum rollcall profiles.
+     */
+    getSnapshot(): DuressSnapshot;
+    private scoreToLevel;
+    private scoreToThrottle;
+    private lerp;
+    private levelOrdinal;
+}

package/build/services/router/duress/index.js ADDED Viewed

@@ -0,0 +1,217 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.DuressManager = void 0;
+const config_1 = require("../config");
+// Throttle band boundaries (ms)
+const MILD_THROTTLE_MIN = 100;
+const MILD_THROTTLE_MAX = 500;
+const MODERATE_THROTTLE_MIN = 500;
+const MODERATE_THROTTLE_MAX = 2000;
+const SEVERE_THROTTLE_MIN = 2000;
+const SEVERE_THROTTLE_MAX = 5000;
+/**
+ * Adaptive engine duress detection via processing latency.
+ *
+ * ## Why this exists
+ *
+ * Prior fixes responded to queue *depth* (a symptom) — doubling reservation
+ * timeouts and halving batch sizes when the stream backed up. A deep queue
+ * doesn't necessarily mean duress (it could be a burst of external triggers),
+ * and a shallow queue doesn't necessarily mean health. This module responds
+ * to the *cause*: actual processing latency per message type.
+ *
+ * ## How it works
+ *
+ * Each engine router tracks an exponential moving average (EMA) of how long
+ * each canonical message type (transition, timehook, webhook, worker response,
+ * etc.) takes to process. When healthy, these are sub-50ms. When the max EMA
+ * crosses configurable thresholds (200ms → mild, 1s → moderate, 5s → severe),
+ * the manager computes a proportional throttle delay that the ThrottleManager
+ * applies as a floor on engine consumption rate.
+ *
+ * ## Hysteresis (asymmetric by design)
+ *
+ * Escalation is immediate — if the engine suddenly enters duress, the throttle
+ * kicks in on the next evaluation. De-escalation requires `HYSTERESIS_COUNT`
+ * (default 3) consecutive improving evaluations before dropping a level. This
+ * prevents oscillation: throttle → drain → un-throttle → refill → throttle.
+ * The EMA already smooths individual outliers; hysteresis gates the recovery
+ * path specifically.
+ *
+ * ## Quorum coordination
+ *
+ * When a router detects a level change (or remains in duress), it broadcasts
+ * a `'duress'` message via the quorum. Peers adopt the signal only if it's
+ * worse than their local state, so the mesh converges on the worst-case
+ * throttle without coordination.
+ *
+ * ## What this does NOT do
+ *
+ * External messages (triggers, signalIn/webhooks from the outside world) are
+ * never throttled. They always enter `engine_streams`. Only the engine
+ * routers' pull rate slows down, giving the system breathing room.
+ */
+class DuressManager {
+    constructor() {
+        // Per-message-type exponential moving averages
+        this.emas = new Map();
+        this.sampleCounts = new Map();
+        // Hysteresis state
+        this.currentLevel = 'healthy';
+        this.belowThresholdCount = 0;
+        // Computed duress throttle floor
+        this.duressThrottle = 0;
+        // Broadcast rate limiting
+        this.lastBroadcastAt = 0;
+        this.lastBroadcastLevel = 'healthy';
+    }
+    /**
+     * Record a processing duration for a message type.
+     * Updates the exponential moving average for that type.
+     */
+    recordLatency(type, durationMs) {
+        const key = type;
+        const count = this.sampleCounts.get(key) || 0;
+        if (count === 0) {
+            // First sample: seed the EMA directly
+            this.emas.set(key, durationMs);
+        }
+        else {
+            const prev = this.emas.get(key);
+            this.emas.set(key, config_1.HMSH_DURESS_ALPHA * durationMs + (1 - config_1.HMSH_DURESS_ALPHA) * prev);
+        }
+        this.sampleCounts.set(key, count + 1);
+    }
+    /**
+     * Evaluate duress state from current EMAs.
+     * Returns a snapshot with level, score, recommended throttle,
+     * and per-type latencies.
+     */
+    evaluate() {
+        // Aggregate: max EMA across all tracked types
+        let maxEma = 0;
+        const perType = {};
+        for (const [type, ema] of this.emas) {
+            perType[type] = Math.round(ema);
+            if (ema > maxEma)
+                maxEma = ema;
+        }
+        const rawLevel = this.scoreToLevel(maxEma);
+        // Hysteresis: only drop level after sustained improvement
+        if (this.levelOrdinal(rawLevel) < this.levelOrdinal(this.currentLevel)) {
+            this.belowThresholdCount++;
+            if (this.belowThresholdCount >= config_1.HMSH_DURESS_HYSTERESIS_COUNT) {
+                this.currentLevel = rawLevel;
+                this.belowThresholdCount = 0;
+            }
+            // Keep current (higher) level until hysteresis clears
+        }
+        else {
+            // Same or worse: reset hysteresis counter, adopt immediately
+            this.belowThresholdCount = 0;
+            this.currentLevel = rawLevel;
+        }
+        this.duressThrottle =
+            this.currentLevel === 'healthy'
+                ? 0
+                : this.scoreToThrottle(maxEma, this.currentLevel);
+        return {
+            level: this.currentLevel,
+            score_ms: Math.round(maxEma),
+            throttle_ms: this.duressThrottle,
+            per_type: perType,
+        };
+    }
+    getDuressThrottle() {
+        return this.duressThrottle;
+    }
+    getCurrentLevel() {
+        return this.currentLevel;
+    }
+    /**
+     * Apply a duress snapshot received from another engine via quorum.
+     * Adopts the remote signal only if it indicates worse duress than local.
+     */
+    applyRemoteDuress(throttleMs, level) {
+        if (this.levelOrdinal(level) > this.levelOrdinal(this.currentLevel)) {
+            this.currentLevel = level;
+            this.duressThrottle = throttleMs;
+            this.belowThresholdCount = 0;
+        }
+    }
+    /**
+     * Whether a quorum broadcast is warranted.
+     * Rate-limited and only fires when level changes or duress is active.
+     */
+    shouldBroadcast() {
+        const now = Date.now();
+        if (now - this.lastBroadcastAt < config_1.HMSH_DURESS_BROADCAST_INTERVAL_MS) {
+            return false;
+        }
+        return (this.currentLevel !== this.lastBroadcastLevel ||
+            this.currentLevel !== 'healthy');
+    }
+    markBroadcast() {
+        this.lastBroadcastAt = Date.now();
+        this.lastBroadcastLevel = this.currentLevel;
+    }
+    /**
+     * Returns a snapshot for inclusion in quorum rollcall profiles.
+     */
+    getSnapshot() {
+        let maxEma = 0;
+        const perType = {};
+        for (const [type, ema] of this.emas) {
+            perType[type] = Math.round(ema);
+            if (ema > maxEma)
+                maxEma = ema;
+        }
+        return {
+            level: this.currentLevel,
+            score_ms: Math.round(maxEma),
+            throttle_ms: this.duressThrottle,
+            per_type: perType,
+        };
+    }
+    // --- Private helpers ---
+    scoreToLevel(ms) {
+        if (ms < config_1.HMSH_DURESS_HEALTHY_CEILING_MS)
+            return 'healthy';
+        if (ms < config_1.HMSH_DURESS_MILD_CEILING_MS)
+            return 'mild';
+        if (ms < config_1.HMSH_DURESS_MODERATE_CEILING_MS)
+            return 'moderate';
+        return 'severe';
+    }
+    scoreToThrottle(ms, level) {
+        // Linear interpolation within the band for the given level
+        switch (level) {
+            case 'healthy':
+                return 0;
+            case 'mild':
+                return this.lerp(ms, config_1.HMSH_DURESS_HEALTHY_CEILING_MS, config_1.HMSH_DURESS_MILD_CEILING_MS, MILD_THROTTLE_MIN, MILD_THROTTLE_MAX);
+            case 'moderate':
+                return this.lerp(ms, config_1.HMSH_DURESS_MILD_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS, MODERATE_THROTTLE_MIN, MODERATE_THROTTLE_MAX);
+            case 'severe':
+                // Clamp to severe band max; beyond the ceiling is still severe
+                return this.lerp(ms, config_1.HMSH_DURESS_MODERATE_CEILING_MS, config_1.HMSH_DURESS_MODERATE_CEILING_MS * 2, SEVERE_THROTTLE_MIN, SEVERE_THROTTLE_MAX);
+        }
+    }
+    lerp(value, inMin, inMax, outMin, outMax) {
+        const t = Math.min(Math.max((value - inMin) / (inMax - inMin), 0), 1);
+        return Math.round(outMin + t * (outMax - outMin));
+    }
+    levelOrdinal(level) {
+        switch (level) {
+            case 'healthy':
+                return 0;
+            case 'mild':
+                return 1;
+            case 'moderate':
+                return 2;
+            case 'severe':
+                return 3;
+        }
+    }
+}
+exports.DuressManager = DuressManager;

package/build/services/router/index.d.ts CHANGED Viewed

@@ -2,7 +2,9 @@
 import { ILogger } from '../logger';
 import { StreamService } from '../stream';
 import { RouterConfig, StreamData, StreamDataResponse, StreamRole } from '../../types/stream';
+import { DuressLevel } from '../../types/quorum';
 import { ProviderClient, ProviderTransaction } from '../../types/provider';
+import { DuressSnapshot } from './duress';
 declare class Router<S extends StreamService<ProviderClient, ProviderTransaction>> {
     appId: string;
     guid: string;
@@ -29,6 +31,8 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
     private errorHandler;
     private lifecycleManager;
     private consumptionManager;
+    private duressManager?;
+    private _pendingDuressSnapshot?;
     constructor(config: RouterConfig, stream: S, logger: ILogger);
     get throttle(): number;
     get shouldConsume(): boolean;
@@ -49,6 +53,9 @@ declare class Router<S extends StreamService<ProviderClient, ProviderTransaction
     structureUnhandledError(input: StreamData, err: Error): StreamDataResponse;
     structureUnacknowledgedError(input: StreamData): StreamDataResponse;
     structureError(input: StreamData, output: StreamDataResponse): StreamDataResponse;
+    setDuressCallback(callback: (snapshot: DuressSnapshot) => void): void;
+    applyRemoteDuress(throttleMs: number, level: DuressLevel): void;
+    getDuressSnapshot(): DuressSnapshot | undefined;
     static stopConsuming(): Promise<void>;
     stopConsuming(): Promise<void>;
     cancelThrottle(): void;

package/build/services/router/index.js CHANGED Viewed

@@ -1,12 +1,14 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.Router = void 0;
+const stream_1 = require("../../types/stream");
 // Import the new submodules
 const config_1 = require("./config");
 const throttling_1 = require("./throttling");
 const error_handling_1 = require("./error-handling");
 const lifecycle_1 = require("./lifecycle");
 const consumption_1 = require("./consumption");
+const duress_1 = require("./duress");
 class Router {
     constructor(config, stream, logger) {
         // Legacy properties for backward compatibility
@@ -34,7 +36,11 @@ class Router {
         this.throttleManager = new throttling_1.ThrottleManager(enhancedConfig.throttle);
         this.errorHandler = new error_handling_1.ErrorHandler();
         this.lifecycleManager = new lifecycle_1.LifecycleManager(this.readonly, this.topic, this.logger, this.stream);
-        this.consumptionManager = new consumption_1.ConsumptionManager(this.stream, this.logger, this.throttleManager, this.errorHandler, this.lifecycleManager, this.reclaimDelay, this.reclaimCount, this.appId, this.role, this, this.retry);
+        // Engine routers get duress detection; workers do not
+        if (this.role === stream_1.StreamRole.ENGINE) {
+            this.duressManager = new duress_1.DuressManager();
+        }
+        this.consumptionManager = new consumption_1.ConsumptionManager(this.stream, this.logger, this.throttleManager, this.errorHandler, this.lifecycleManager, this.reclaimDelay, this.reclaimCount, this.appId, this.role, this, this.retry, this.duressManager);
         this.resetThrottleState();
     }
     // Legacy compatibility methods
@@ -99,6 +105,17 @@ class Router {
     structureError(input, output) {
         return this.errorHandler.structureError(input, output);
     }
+    // Duress detection methods (engine routers only)
+    setDuressCallback(callback) {
+        this.consumptionManager.setDuressCallback(callback);
+    }
+    applyRemoteDuress(throttleMs, level) {
+        this.duressManager?.applyRemoteDuress(throttleMs, level);
+        this.throttleManager.setDuressFloor(throttleMs);
+    }
+    getDuressSnapshot() {
+        return this.duressManager?.getSnapshot();
+    }
     // Static methods for instance management
     static async stopConsuming() {
         return lifecycle_1.InstanceRegistry.stopAll();

package/build/services/router/throttling/index.d.ts CHANGED Viewed

@@ -1,11 +1,39 @@
+/**
+ * Elastic throttle with two independent inputs:
+ *
+ * 1. **User throttle** — set explicitly via quorum `throttle` command.
+ *    Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
+ *
+ * 2. **Duress floor** — set automatically by the DuressManager based on
+ *    processing latency. The effective throttle is `max(user, duress)`,
+ *    so duress never reduces below what the user set, and pause always
+ *    takes precedence. When duress clears (floor returns to 0), the
+ *    user's original throttle remains in effect.
+ *
+ * `customSleep()` uses the effective throttle, supports dynamic
+ * interruption (if the throttle decreases mid-sleep, the router wakes
+ * early), and handles pause via a bare promise with no timer.
+ */
 export declare class ThrottleManager {
     private throttle;
+    private duressFloor;
     private isSleeping;
     private sleepPromiseResolve;
     private innerPromiseResolve;
     private sleepTimeout;
     constructor(initialThrottle?: number);
     getThrottle(): number;
+    /**
+     * Set the duress-computed throttle floor. The effective throttle
+     * is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
+     */
+    setDuressFloor(delayMs: number): void;
+    getDuressFloor(): number;
+    /**
+     * Returns the effective throttle: max of user-set throttle and
+     * duress floor. Pause (negative) always takes precedence.
+     */
+    getEffectiveThrottle(): number;
     setThrottle(delayInMillis: number): void;
     isPaused(): boolean;
     /**

package/build/services/router/throttling/index.js CHANGED Viewed

@@ -1,9 +1,26 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ThrottleManager = void 0;
+/**
+ * Elastic throttle with two independent inputs:
+ *
+ * 1. **User throttle** — set explicitly via quorum `throttle` command.
+ *    Absolute value: 0 = resume, >0 = delay per message, -1 = pause.
+ *
+ * 2. **Duress floor** — set automatically by the DuressManager based on
+ *    processing latency. The effective throttle is `max(user, duress)`,
+ *    so duress never reduces below what the user set, and pause always
+ *    takes precedence. When duress clears (floor returns to 0), the
+ *    user's original throttle remains in effect.
+ *
+ * `customSleep()` uses the effective throttle, supports dynamic
+ * interruption (if the throttle decreases mid-sleep, the router wakes
+ * early), and handles pause via a bare promise with no timer.
+ */
 class ThrottleManager {
     constructor(initialThrottle = 0) {
         this.throttle = 0;
+        this.duressFloor = 0;
         this.isSleeping = false;
         this.sleepPromiseResolve = null;
         this.innerPromiseResolve = null;
@@ -13,6 +30,25 @@ class ThrottleManager {
     getThrottle() {
         return this.throttle;
     }
+    /**
+     * Set the duress-computed throttle floor. The effective throttle
+     * is max(userThrottle, duressFloor). Pause (throttle < 0) overrides.
+     */
+    setDuressFloor(delayMs) {
+        this.duressFloor = Math.max(0, delayMs);
+    }
+    getDuressFloor() {
+        return this.duressFloor;
+    }
+    /**
+     * Returns the effective throttle: max of user-set throttle and
+     * duress floor. Pause (negative) always takes precedence.
+     */
+    getEffectiveThrottle() {
+        if (this.throttle < 0)
+            return this.throttle; // pause overrides
+        return Math.max(this.throttle, this.duressFloor);
+    }
     setThrottle(delayInMillis) {
         const wasPaused = this.throttle < 0;
         const wasDecreased = delayInMillis < this.throttle;
@@ -45,12 +81,13 @@ class ThrottleManager {
      * setThrottle() is called with a non-negative value.
      */
     async customSleep() {
-        if (this.throttle === 0)
+        const effective = this.getEffectiveThrottle();
+        if (effective === 0)
             return;
         if (this.isSleeping)
             return;
         this.isSleeping = true;
-        if (this.throttle < 0) {
+        if (effective < 0) {
             // Paused: wait indefinitely until setThrottle interrupts
             await new Promise((resolve) => {
                 this.innerPromiseResolve = resolve;
@@ -62,12 +99,14 @@ class ThrottleManager {
         await new Promise(async (outerResolve) => {
             this.sleepPromiseResolve = outerResolve;
             let elapsedTime = Date.now() - startTime;
-            while (elapsedTime < this.throttle && this.throttle > 0) {
+            let target = this.getEffectiveThrottle();
+            while (elapsedTime < target && target > 0) {
                 await new Promise((innerResolve) => {
                     this.innerPromiseResolve = innerResolve;
-                    this.sleepTimeout = setTimeout(innerResolve, this.throttle - elapsedTime);
+                    this.sleepTimeout = setTimeout(innerResolve, target - elapsedTime);
                 });
                 elapsedTime = Date.now() - startTime;
+                target = this.getEffectiveThrottle();
             }
             this.resetThrottleState();
             outerResolve();

package/build/services/stream/providers/postgres/kvtables.js CHANGED Viewed

@@ -31,8 +31,10 @@ async function deploySchema(streamClient, appId, logger) {
                     }
                     await client.query('COMMIT');
                 }
-                // Always run index migrations under the lock
+                // Always run index, procedure, and trigger migrations under the lock
                 await ensureIndexes(client, schemaName);
+                await ensureProcedures(client, schemaName);
+                await ensureStatementLevelTriggers(client, schemaName);
             }
             finally {
                 await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
@@ -129,7 +131,12 @@ async function waitForTablesCreation(streamClient, lockId, schemaName, logger) {
 async function ensureIndexes(client, schemaName) {
     const engineTable = `${schemaName}.engine_streams`;
     const workerTable = `${schemaName}.worker_streams`;
-    // Drop legacy indexes that don't include the priority column
+    // Drop legacy indexes that don't include the priority column, plus
+    // redundant ones: idx_*_expired_at duplicates the partial
+    // idx_*_processed_volume for the retention purge, and
+    // idx_*_stream_name_expired_at duplicates the leading column and
+    // predicate of idx_*_message_fetch. Every index here is maintained on
+    // each message's INSERT plus two non-HOT UPDATEs (reserve, ack).
     for (const idx of [
         'idx_engine_streams_dequeue',
         'idx_engine_streams_stale_reservations',
@@ -139,6 +146,10 @@ async function ensureIndexes(client, schemaName) {
         'idx_engine_streams_message_fetch',
         'idx_worker_streams_active_messages',
         'idx_worker_streams_message_fetch',
+        'idx_engine_streams_expired_at',
+        'idx_engine_stream_name_expired_at',
+        'idx_worker_streams_expired_at',
+        'idx_worker_stream_name_expired_at',
     ]) {
         await client.query(`DROP INDEX IF EXISTS ${schemaName}.${idx}`);
     }
@@ -148,9 +159,13 @@ async function ensureIndexes(client, schemaName) {
     ON ${engineTable} (stream_name, priority DESC, visible_at, id)
     WHERE reserved_at IS NULL AND expired_at IS NULL;
   `);
+    // message_fetch must match the dequeue ORDER BY (priority DESC, id)
+    // exactly — placing visible_at between them forces the claim query to
+    // fetch and sort the entire pending backlog instead of stopping at
+    // LIMIT. visible_at and stale-reservation checks are scan filters.
     await client.query(`
     CREATE INDEX IF NOT EXISTS idx_engine_streams_message_fetch
-    ON ${engineTable} (stream_name, priority DESC, visible_at, id)
+    ON ${engineTable} (stream_name, priority DESC, id)
     WHERE expired_at IS NULL;
   `);
     await client.query(`
@@ -160,7 +175,7 @@ async function ensureIndexes(client, schemaName) {
   `);
     await client.query(`
     CREATE INDEX IF NOT EXISTS idx_worker_streams_message_fetch
-    ON ${workerTable} (stream_name, priority DESC, visible_at, id)
+    ON ${workerTable} (stream_name, priority DESC, id)
     WHERE expired_at IS NULL;
   `);
     // v0.18.0: add jid column to engine_streams for job tracing
@@ -171,6 +186,35 @@ async function ensureIndexes(client, schemaName) {
     WHERE jid != '';
   `);
 }
+/**
+ * Re-deploy the SECURITY DEFINER stored procedures on existing
+ * databases so query changes (e.g., worker_dequeue) reach deployments
+ * created before the change. CREATE OR REPLACE preserves grants.
+ */
+async function ensureProcedures(client, schemaName) {
+    for (const sql of (0, procedures_1.getCreateProceduresSQL)(schemaName)) {
+        await client.query(sql);
+    }
+}
+/**
+ * Migrate pre-existing row-level notification triggers to the
+ * statement-level form. Recreating a trigger takes an ACCESS EXCLUSIVE
+ * lock on the table, so only do it when the installed trigger is still
+ * row-level (tgtype bit 0 set); subsequent boots are a no-op.
+ */
+async function ensureStatementLevelTriggers(client, schemaName) {
+    const result = await client.query(`SELECT count(*) AS row_level
+     FROM pg_trigger t
+     JOIN pg_class c ON c.oid = t.tgrelid
+     JOIN pg_namespace n ON n.oid = c.relnamespace
+     WHERE n.nspname = $1
+       AND c.relname IN ('engine_streams', 'worker_streams')
+       AND t.tgname IN ('notify_engine_stream_insert', 'notify_worker_stream_insert')
+       AND (t.tgtype & 1) = 1`, [schemaName]);
+    if (parseInt(result.rows[0].row_level, 10) > 0) {
+        await createNotificationTriggers(client, schemaName);
+    }
+}
 async function createTables(client, schemaName) {
     await client.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName};`);
     // ---- ENGINE_STREAMS table ----
@@ -210,16 +254,7 @@ async function createTables(client, schemaName) {
   `);
     await client.query(`
     CREATE INDEX IF NOT EXISTS idx_engine_streams_message_fetch
-    ON ${engineTable} (stream_name, priority DESC, visible_at, id)
-    WHERE expired_at IS NULL;
-  `);
-    await client.query(`
-    CREATE INDEX IF NOT EXISTS idx_engine_streams_expired_at
-    ON ${engineTable} (expired_at);
-  `);
-    await client.query(`
-    CREATE INDEX IF NOT EXISTS idx_engine_stream_name_expired_at
-    ON ${engineTable} (stream_name)
+    ON ${engineTable} (stream_name, priority DESC, id)
     WHERE expired_at IS NULL;
   `);
     await client.query(`
@@ -280,16 +315,7 @@ async function createTables(client, schemaName) {
   `);
     await client.query(`
     CREATE INDEX IF NOT EXISTS idx_worker_streams_message_fetch
-    ON ${workerTable} (stream_name, priority DESC, visible_at, id)
-    WHERE expired_at IS NULL;
-  `);
-    await client.query(`
-    CREATE INDEX IF NOT EXISTS idx_worker_streams_expired_at
-    ON ${workerTable} (expired_at);
-  `);
-    await client.query(`
-    CREATE INDEX IF NOT EXISTS idx_worker_stream_name_expired_at
-    ON ${workerTable} (stream_name)
+    ON ${workerTable} (stream_name, priority DESC, id)
     WHERE expired_at IS NULL;
   `);
     await client.query(`
@@ -342,28 +368,35 @@ async function createNotificationTriggers(client, schemaName) {
     const engineTable = `${schemaName}.engine_streams`;
     const workerTable = `${schemaName}.worker_streams`;
     // ---- ENGINE notification trigger ----
+    // Statement-level with a transition table: one pg_notify per distinct
+    // stream_name per INSERT statement. Row-level triggers fire pg_notify
+    // per message, which both multiplies trigger overhead and serializes
+    // commits on the global notification queue lock at high insert rates.
     await client.query(`
     CREATE OR REPLACE FUNCTION ${schemaName}.notify_new_engine_stream_message()
     RETURNS TRIGGER AS $$
     DECLARE
+      rec RECORD;
       channel_name TEXT;
       payload JSON;
     BEGIN
-      IF NEW.visible_at <= NOW() THEN
-        channel_name := 'eng_' || NEW.stream_name;
+      FOR rec IN
+        SELECT DISTINCT stream_name FROM new_rows WHERE visible_at <= NOW()
+      LOOP
+        channel_name := 'eng_' || rec.stream_name;
         IF length(channel_name) > 63 THEN
           channel_name := left(channel_name, 63);
         END IF;
         payload := json_build_object(
-          'stream_name', NEW.stream_name,
+          'stream_name', rec.stream_name,
           'table_type', 'engine'
         );
         PERFORM pg_notify(channel_name, payload::text);
-      END IF;
+      END LOOP;
-      RETURN NEW;
+      RETURN NULL;
     END;
     $$ LANGUAGE plpgsql;
   `);
@@ -371,7 +404,8 @@ async function createNotificationTriggers(client, schemaName) {
     DROP TRIGGER IF EXISTS notify_engine_stream_insert ON ${engineTable};
     CREATE TRIGGER notify_engine_stream_insert
       AFTER INSERT ON ${engineTable}
-      FOR EACH ROW
+      REFERENCING NEW TABLE AS new_rows
+      FOR EACH STATEMENT
       EXECUTE FUNCTION ${schemaName}.notify_new_engine_stream_message();
   `);
     // ---- WORKER notification trigger ----
@@ -379,24 +413,27 @@ async function createNotificationTriggers(client, schemaName) {
     CREATE OR REPLACE FUNCTION ${schemaName}.notify_new_worker_stream_message()
     RETURNS TRIGGER AS $$
     DECLARE
+      rec RECORD;
       channel_name TEXT;
       payload JSON;
     BEGIN
-      IF NEW.visible_at <= NOW() THEN
-        channel_name := 'wrk_' || NEW.stream_name;
+      FOR rec IN
+        SELECT DISTINCT stream_name FROM new_rows WHERE visible_at <= NOW()
+      LOOP
+        channel_name := 'wrk_' || rec.stream_name;
         IF length(channel_name) > 63 THEN
           channel_name := left(channel_name, 63);
         END IF;
         payload := json_build_object(
-          'stream_name', NEW.stream_name,
+          'stream_name', rec.stream_name,
           'table_type', 'worker'
         );
         PERFORM pg_notify(channel_name, payload::text);
-      END IF;
+      END LOOP;
-      RETURN NEW;
+      RETURN NULL;
     END;
     $$ LANGUAGE plpgsql;
   `);
@@ -404,7 +441,8 @@ async function createNotificationTriggers(client, schemaName) {
     DROP TRIGGER IF EXISTS notify_worker_stream_insert ON ${workerTable};
     CREATE TRIGGER notify_worker_stream_insert
       AFTER INSERT ON ${workerTable}
-      FOR EACH ROW
+      REFERENCING NEW TABLE AS new_rows
+      FOR EACH STATEMENT
       EXECUTE FUNCTION ${schemaName}.notify_new_worker_stream_message();
   `);
     // ---- Visibility timeout notification function (queries both tables) ----

package/build/services/stream/providers/postgres/messages.js CHANGED Viewed

@@ -215,18 +215,24 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
     const maxRetries = options?.maxRetries ?? 3;
     let backoff = initialBackoff;
     let retries = 0;
-    // Include workflow_name in RETURNING for worker streams
+    // Include workflow_name in RETURNING for worker streams. Columns are
+    // qualified with the update target's alias because the claim UPDATE
+    // joins a CTE that also exposes an id column.
     const returningClause = isEngine
-        ? 'id, message, max_retry_attempts, backoff_coefficient, maximum_interval_seconds, retry_attempt'
-        : 'id, message, workflow_name, max_retry_attempts, backoff_coefficient, maximum_interval_seconds, retry_attempt';
+        ? 't.id, t.message, t.max_retry_attempts, t.backoff_coefficient, t.maximum_interval_seconds, t.retry_attempt'
+        : 't.id, t.message, t.workflow_name, t.max_retry_attempts, t.backoff_coefficient, t.maximum_interval_seconds, t.retry_attempt';
     try {
         while (retries < maxRetries) {
             retries++;
             const batchSize = options?.batchSize || 1;
             const reservationTimeout = options?.reservationTimeout || (enums_1.HMSH_RESERVATION_TIMEOUT_S + 5);
-            const res = await client.query(`UPDATE ${tableName}
-         SET reserved_at = NOW(), reserved_by = $3
-         WHERE id IN (
+            // The locking SELECT must live in a MATERIALIZED CTE: as a plain IN
+            // subquery the planner may re-execute it per outer row (rows updated
+            // earlier in the same command are skipped as lock candidates), which
+            // reserves MORE rows than LIMIT. The UPDATE repeats stream_name so
+            // the planner prunes to a single hash partition and joins on the
+            // (stream_name, id) primary key.
+            const res = await client.query(`WITH candidates AS MATERIALIZED (
            SELECT id FROM ${tableName}
            WHERE stream_name = $1
              AND (reserved_at IS NULL OR reserved_at < NOW() - INTERVAL '${reservationTimeout} seconds')
@@ -236,6 +242,10 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
            LIMIT $2
            FOR UPDATE SKIP LOCKED
          )
+         UPDATE ${tableName} t
+         SET reserved_at = NOW(), reserved_by = $3
+         FROM candidates
+         WHERE t.stream_name = $1 AND t.id = candidates.id
          RETURNING ${returningClause}`, [streamName, batchSize, consumerName]);
             const messages = res.rows.map((row) => {
                 const data = (0, utils_1.parseStreamMessage)(row.message);

package/build/services/stream/providers/postgres/postgres.d.ts CHANGED Viewed

@@ -36,6 +36,13 @@ declare class PostgresStreamService extends StreamService<PostgresClientType & P
     init(namespace: string, appId: string, logger: ILogger): Promise<void>;
     private isNotificationsEnabled;
     private checkForMissedMessages;
+    /**
+     * Notification-driven fetch with coalescing. NOTIFYs that arrive while
+     * a fetch is in flight set fetchPending instead of issuing concurrent
+     * claim queries (a burst of N inserts otherwise triggers N claims per
+     * consumer, most returning empty). The drain loop re-fetches while the
+     * batch came back full or a NOTIFY arrived mid-fetch.
+     */
     private fetchAndDeliverMessages;
     private getConsumerKey;
     /**

package/build/services/stream/providers/postgres/postgres.js CHANGED Viewed

@@ -82,11 +82,31 @@ class PostgresStreamService extends index_1.StreamService {
             return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: instance.reservationTimeout, enableBackoff: false, maxRetries: 1 });
         });
     }
+    /**
+     * Notification-driven fetch with coalescing. NOTIFYs that arrive while
+     * a fetch is in flight set fetchPending instead of issuing concurrent
+     * claim queries (a burst of N inserts otherwise triggers N claims per
+     * consumer, most returning empty). The drain loop re-fetches while the
+     * batch came back full or a NOTIFY arrived mid-fetch.
+     */
     async fetchAndDeliverMessages(consumer) {
+        if (consumer.fetchInFlight) {
+            consumer.fetchPending = true;
+            return;
+        }
+        consumer.fetchInFlight = true;
+        const batchSize = 10;
         try {
-            const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
-            if (messages.length > 0) {
-                consumer.callback(messages);
+            let drain = true;
+            while (drain && consumer.isListening !== false) {
+                consumer.fetchPending = false;
+                const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
+                if (messages.length > 0) {
+                    consumer.callback(messages);
+                }
+                // Boolean() rather than === true: fetchPending is mutated by the
+                // notification handler across the await, which TS narrowing misses
+                drain = messages.length === batchSize || Boolean(consumer.fetchPending);
             }
         }
         catch (error) {
@@ -96,6 +116,9 @@ class PostgresStreamService extends index_1.StreamService {
                 error,
             });
         }
+        finally {
+            consumer.fetchInFlight = false;
+        }
     }
     getConsumerKey(streamName, groupName) {
         return `${streamName}:${groupName}`;

package/build/services/stream/providers/postgres/procedures.js CHANGED Viewed

@@ -54,10 +54,12 @@ function getCreateProceduresSQL(schemaName) {
     SET search_path = ${schemaName}, pg_temp
     AS $$
     ${STREAM_ACCESS_CHECK}
+      -- The locking SELECT must live in a MATERIALIZED CTE: as a plain IN
+      -- subquery the planner may re-execute it per outer row, reserving
+      -- MORE rows than p_batch_size. stream_name on the UPDATE prunes to
+      -- a single hash partition and joins on the primary key.
       RETURN QUERY
-      UPDATE ${workerTable} ws
-      SET reserved_at = NOW(), reserved_by = p_consumer_id
-      WHERE ws.id IN (
+      WITH candidates AS MATERIALIZED (
         SELECT ws2.id FROM ${workerTable} ws2
         WHERE ws2.stream_name = p_stream_name
           AND (ws2.reserved_at IS NULL OR ws2.reserved_at < NOW() - (p_reservation_timeout_sec || ' seconds')::INTERVAL)
@@ -67,6 +69,11 @@ function getCreateProceduresSQL(schemaName) {
         LIMIT p_batch_size
         FOR UPDATE SKIP LOCKED
       )
+      UPDATE ${workerTable} ws
+      SET reserved_at = NOW(), reserved_by = p_consumer_id
+      FROM candidates
+      WHERE ws.stream_name = p_stream_name
+        AND ws.id = candidates.id
       RETURNING ws.id, ws.message, ws.workflow_name, ws.max_retry_attempts,
                 ws.backoff_coefficient, ws.maximum_interval_seconds, ws.retry_attempt;
     END;

package/build/types/quorum.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import { JobOutput } from './job';
 import { StringAnyType } from './serializer';
+/** Duress severity level for adaptive engine throttling. */
+export type DuressLevel = 'healthy' | 'mild' | 'moderate' | 'severe';
 export interface CPULoad {
     [cpu: string]: string;
 }
@@ -86,6 +88,12 @@ export interface QuorumProfile {
     system?: SystemHealth;
     /** Stringified worker callback function (only if `signature: true` in rollcall). */
     signature?: string;
+    /** Current duress level. Engine routers only. */
+    duress_level?: DuressLevel;
+    /** Current duress score in ms (max EMA across message types). Engine routers only. */
+    duress_score_ms?: number;
+    /** Per-message-type EMA latencies in ms. Engine routers only. */
+    duress_per_type?: Record<string, number>;
 }
 interface QuorumMessageBase {
     entity?: string;
@@ -138,6 +146,17 @@ export interface ThrottleMessage extends QuorumMessageBase {
     topic?: string;
     throttle: number;
 }
+export interface DuressMessage extends QuorumMessageBase {
+    type: 'duress';
+    /** GUID of the engine that detected duress */
+    originator: string;
+    /** Aggregate duress score (max EMA across message types) in ms */
+    duress_score_ms: number;
+    /** Recommended throttle delay in ms */
+    throttle_ms: number;
+    /** Duress severity level */
+    level: DuressLevel;
+}
 export interface RollCallMessage extends QuorumMessageBase {
     type: 'rollcall';
     guid?: string;
@@ -169,5 +188,5 @@ export type SubscriptionOptions = {
  * These messages serve to coordinate the cache invalidation and switch-over
  * to the new version without any downtime and a coordinating parent server.
  */
-export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | RollCallMessage | CronMessage | UserMessage;
+export type QuorumMessage = PingMessage | PongMessage | ActivateMessage | WorkMessage | JobMessage | ThrottleMessage | DuressMessage | RollCallMessage | CronMessage | UserMessage;
 export {};

package/build/types/stream.d.ts CHANGED Viewed

@@ -300,4 +300,8 @@ export interface NotificationConsumer {
     lastFallbackCheck: number;
     /** Service instance that owns this consumer (for fetchAndDeliverMessages dispatch) */
     serviceInstance?: any;
+    /** True while a notification-driven fetch is in flight (coalesces concurrent NOTIFYs) */
+    fetchInFlight?: boolean;
+    /** Set when a NOTIFY arrives mid-fetch; triggers one follow-up fetch */
+    fetchPending?: boolean;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hotmeshio/hotmesh",
-  "version": "0.19.4",
+  "version": "0.20.0",
   "description": "Durable Workflow",
   "main": "./build/index.js",
   "types": "./build/index.d.ts",