@hotmeshio/hotmesh 0.14.5 → 0.14.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/modules/enums.d.ts +36 -0
- package/build/modules/enums.js +38 -2
- package/build/package.json +3 -2
- package/build/services/activities/activity/process.js +31 -1
- package/build/services/router/config/index.d.ts +2 -2
- package/build/services/router/config/index.js +3 -1
- package/build/services/router/consumption/index.d.ts +13 -0
- package/build/services/router/consumption/index.js +58 -2
- package/build/services/stream/index.d.ts +2 -0
- package/build/services/stream/index.js +3 -0
- package/build/services/stream/providers/postgres/messages.js +2 -1
- package/build/services/stream/providers/postgres/postgres.js +2 -2
- package/build/services/stream/providers/postgres/secured.js +2 -1
- package/package.json +3 -2
package/build/modules/enums.d.ts
CHANGED
|
@@ -161,6 +161,42 @@ export declare const HMSH_BLOCK_TIME_MS: number;
|
|
|
161
161
|
export declare const HMSH_XCLAIM_DELAY_MS: number;
|
|
162
162
|
export declare const HMSH_XCLAIM_COUNT: number;
|
|
163
163
|
export declare const HMSH_XPENDING_COUNT: number;
|
|
164
|
+
export declare const HMSH_BATCH_SIZE: number;
|
|
165
|
+
/**
|
|
166
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
167
|
+
*
|
|
168
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
169
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
170
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
171
|
+
* message becomes available to other consumers — causing duplicate
|
|
172
|
+
* delivery, collation errors, and wasted CPU.
|
|
173
|
+
*
|
|
174
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
175
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
176
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
177
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
178
|
+
* delivery under burst load without manual intervention.
|
|
179
|
+
*
|
|
180
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
181
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
182
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
183
|
+
* Setting a higher default reduces how aggressively the system must
|
|
184
|
+
* adapt during load spikes.
|
|
185
|
+
*
|
|
186
|
+
* **Symptoms of a value that is too low:**
|
|
187
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
188
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
189
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
190
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* // Production with sustained high concurrency
|
|
194
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
195
|
+
*
|
|
196
|
+
* // Low-latency environments with fast processing
|
|
197
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
198
|
+
*/
|
|
199
|
+
export declare const HMSH_RESERVATION_TIMEOUT_S: number;
|
|
164
200
|
export declare const HMSH_EXPIRE_DURATION: number;
|
|
165
201
|
export declare const HMSH_FIDELITY_SECONDS: number;
|
|
166
202
|
export declare const HMSH_SCOUT_INTERVAL_SECONDS: number;
|
package/build/modules/enums.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = void 0;
|
|
3
|
+
exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
|
|
4
|
+
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = void 0;
|
|
5
5
|
/**
|
|
6
6
|
* Determines the log level for the application. The default is 'info'.
|
|
7
7
|
*/
|
|
@@ -178,6 +178,42 @@ exports.HMSH_BLOCK_TIME_MS = process.env.HMSH_BLOCK_TIME_MS
|
|
|
178
178
|
exports.HMSH_XCLAIM_DELAY_MS = parseInt(process.env.HMSH_XCLAIM_DELAY_MS, 10) || 1000 * 60;
|
|
179
179
|
exports.HMSH_XCLAIM_COUNT = parseInt(process.env.HMSH_XCLAIM_COUNT, 10) || 3;
|
|
180
180
|
exports.HMSH_XPENDING_COUNT = parseInt(process.env.HMSH_XPENDING_COUNT, 10) || 10;
|
|
181
|
+
exports.HMSH_BATCH_SIZE = parseInt(process.env.HMSH_BATCH_SIZE, 10) || 10;
|
|
182
|
+
/**
|
|
183
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
184
|
+
*
|
|
185
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
186
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
187
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
188
|
+
* message becomes available to other consumers — causing duplicate
|
|
189
|
+
* delivery, collation errors, and wasted CPU.
|
|
190
|
+
*
|
|
191
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
192
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
193
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
194
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
195
|
+
* delivery under burst load without manual intervention.
|
|
196
|
+
*
|
|
197
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
198
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
199
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
200
|
+
* Setting a higher default reduces how aggressively the system must
|
|
201
|
+
* adapt during load spikes.
|
|
202
|
+
*
|
|
203
|
+
* **Symptoms of a value that is too low:**
|
|
204
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
205
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
206
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
207
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
208
|
+
*
|
|
209
|
+
* @example
|
|
210
|
+
* // Production with sustained high concurrency
|
|
211
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
212
|
+
*
|
|
213
|
+
* // Low-latency environments with fast processing
|
|
214
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
215
|
+
*/
|
|
216
|
+
exports.HMSH_RESERVATION_TIMEOUT_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_S, 10) || 30;
|
|
181
217
|
// TASK WORKER
|
|
182
218
|
exports.HMSH_EXPIRE_DURATION = parseInt(process.env.HMSH_EXPIRE_DURATION, 10) || 1;
|
|
183
219
|
const BASE_FIDELITY_SECONDS = 5;
|
package/build/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.6",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|
|
@@ -6,6 +6,11 @@ const errors_1 = require("../../../modules/errors");
|
|
|
6
6
|
const collator_1 = require("../../collator");
|
|
7
7
|
const telemetry_1 = require("../../telemetry");
|
|
8
8
|
const stream_1 = require("../../../types/stream");
|
|
9
|
+
// Per-instance collation error tracking for reservation timeout detection
|
|
10
|
+
let collationErrorCount = 0;
|
|
11
|
+
let collationWindowStart = Date.now();
|
|
12
|
+
const COLLATION_WARN_THRESHOLD = 10;
|
|
13
|
+
const COLLATION_WINDOW_MS = 60000;
|
|
9
14
|
async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, code = 200, type = 'output') {
|
|
10
15
|
instance.setLeg(2);
|
|
11
16
|
const jid = instance.context.metadata.jid;
|
|
@@ -66,7 +71,32 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
66
71
|
}
|
|
67
72
|
catch (error) {
|
|
68
73
|
if (error instanceof errors_1.CollationError) {
|
|
69
|
-
|
|
74
|
+
// INACTIVE is legitimate duplicate detection — the Postgres atomic
|
|
75
|
+
// CTE (collateLeg2Entry) serializes via row locks, so the GUID
|
|
76
|
+
// ledger value is correct. Silent ack is the right behavior:
|
|
77
|
+
// the work was already done by a prior delivery of this message.
|
|
78
|
+
const now = Date.now();
|
|
79
|
+
if (now - collationWindowStart > COLLATION_WINDOW_MS) {
|
|
80
|
+
collationErrorCount = 0;
|
|
81
|
+
collationWindowStart = now;
|
|
82
|
+
}
|
|
83
|
+
collationErrorCount++;
|
|
84
|
+
if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
|
|
85
|
+
instance.logger.warn('process-event-collation-rate-exceeded', {
|
|
86
|
+
count: collationErrorCount,
|
|
87
|
+
windowMs: COLLATION_WINDOW_MS,
|
|
88
|
+
reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
89
|
+
message: `${COLLATION_WARN_THRESHOLD} collation errors in ${COLLATION_WINDOW_MS / 1000}s. ` +
|
|
90
|
+
`This typically means HMSH_RESERVATION_TIMEOUT_S (currently ${enums_1.HMSH_RESERVATION_TIMEOUT_S}s) ` +
|
|
91
|
+
`is too short for your workload — messages are being re-reserved before processing completes, ` +
|
|
92
|
+
`causing duplicate delivery. Increase HMSH_RESERVATION_TIMEOUT_S.`,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
instance.logger.warn(`process-event-${error.fault}-error`, {
|
|
96
|
+
jid: instance.context.metadata.jid,
|
|
97
|
+
aid: instance.metadata.aid,
|
|
98
|
+
error,
|
|
99
|
+
});
|
|
70
100
|
return;
|
|
71
101
|
}
|
|
72
102
|
else if (error instanceof errors_1.InactiveJobError) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
1
|
+
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_RESERVATION_TIMEOUT_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
2
2
|
import { RouterConfig } from '../../../types/stream';
|
|
3
3
|
export declare class RouterConfigManager {
|
|
4
4
|
static validateThrottle(delayInMillis: number): void;
|
|
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
|
|
|
8
8
|
readonly: boolean;
|
|
9
9
|
};
|
|
10
10
|
}
|
|
11
|
-
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
11
|
+
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_RESERVATION_TIMEOUT_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
3
|
+
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
4
4
|
const enums_1 = require("../../../modules/enums");
|
|
5
5
|
Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
|
|
6
6
|
Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
|
|
@@ -12,6 +12,8 @@ Object.defineProperty(exports, "HMSH_STATUS_UNKNOWN", { enumerable: true, get: f
|
|
|
12
12
|
Object.defineProperty(exports, "HMSH_XCLAIM_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_COUNT; } });
|
|
13
13
|
Object.defineProperty(exports, "HMSH_XCLAIM_DELAY_MS", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_DELAY_MS; } });
|
|
14
14
|
Object.defineProperty(exports, "HMSH_XPENDING_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XPENDING_COUNT; } });
|
|
15
|
+
Object.defineProperty(exports, "HMSH_BATCH_SIZE", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE; } });
|
|
16
|
+
Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_S; } });
|
|
15
17
|
Object.defineProperty(exports, "MAX_DELAY", { enumerable: true, get: function () { return enums_1.MAX_DELAY; } });
|
|
16
18
|
Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.MAX_STREAM_BACKOFF; } });
|
|
17
19
|
Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
|
|
@@ -26,7 +26,20 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
26
26
|
private set hasReachedMaxBackoff(value);
|
|
27
27
|
private router;
|
|
28
28
|
private retry;
|
|
29
|
+
private adaptiveReservationTimeout;
|
|
30
|
+
private lastDepthCheckAt;
|
|
31
|
+
private static readonly DEPTH_CHECK_INTERVAL_MS;
|
|
32
|
+
private static readonly DEPTH_SCALE_UP_THRESHOLD;
|
|
33
|
+
private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
|
|
34
|
+
private static readonly RESERVATION_TIMEOUT_MAX_S;
|
|
29
35
|
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
|
|
36
|
+
/**
|
|
37
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
38
|
+
* from the consume loop. When depth is high, messages take longer to
|
|
39
|
+
* process, so the reservation window must grow to prevent re-delivery.
|
|
40
|
+
* When depth drops, the timeout shrinks back toward the configured default.
|
|
41
|
+
*/
|
|
42
|
+
private adjustReservationTimeout;
|
|
30
43
|
createGroup(stream: string, group: string): Promise<void>;
|
|
31
44
|
publishMessage(topic: string, streamData: StreamData | StreamDataResponse, transaction?: ProviderTransaction): Promise<string | ProviderTransaction>;
|
|
32
45
|
consumeMessages(stream: string, group: string, consumer: string, callback: (streamData: StreamData) => Promise<StreamDataResponse | void>): Promise<void>;
|
|
@@ -17,6 +17,11 @@ class ConsumptionManager {
|
|
|
17
17
|
get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
|
|
18
18
|
set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
|
|
19
19
|
constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
|
|
20
|
+
// Adaptive reservation timeout — scales with stream depth to prevent
|
|
21
|
+
// duplicate message delivery under load. When the stream backs up,
|
|
22
|
+
// processing takes longer, so the reservation window must grow.
|
|
23
|
+
this.adaptiveReservationTimeout = config_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
24
|
+
this.lastDepthCheckAt = 0;
|
|
20
25
|
this.stream = stream;
|
|
21
26
|
this.logger = logger;
|
|
22
27
|
this.throttleManager = throttleManager;
|
|
@@ -29,6 +34,46 @@ class ConsumptionManager {
|
|
|
29
34
|
this.router = router;
|
|
30
35
|
this.retry = retry;
|
|
31
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
39
|
+
* from the consume loop. When depth is high, messages take longer to
|
|
40
|
+
* process, so the reservation window must grow to prevent re-delivery.
|
|
41
|
+
* When depth drops, the timeout shrinks back toward the configured default.
|
|
42
|
+
*/
|
|
43
|
+
async adjustReservationTimeout(stream) {
|
|
44
|
+
const now = Date.now();
|
|
45
|
+
if (now - this.lastDepthCheckAt < ConsumptionManager.DEPTH_CHECK_INTERVAL_MS) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
this.lastDepthCheckAt = now;
|
|
49
|
+
try {
|
|
50
|
+
const depth = await this.stream.getStreamDepth(stream);
|
|
51
|
+
const prev = this.adaptiveReservationTimeout;
|
|
52
|
+
if (depth > ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD) {
|
|
53
|
+
// Scale up: double the timeout, capped at max
|
|
54
|
+
this.adaptiveReservationTimeout = Math.min(this.adaptiveReservationTimeout * 2, ConsumptionManager.RESERVATION_TIMEOUT_MAX_S);
|
|
55
|
+
}
|
|
56
|
+
else if (depth < ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD) {
|
|
57
|
+
// Scale down: halve toward the configured default
|
|
58
|
+
this.adaptiveReservationTimeout = Math.max(Math.floor(this.adaptiveReservationTimeout / 2), config_1.HMSH_RESERVATION_TIMEOUT_S);
|
|
59
|
+
}
|
|
60
|
+
if (this.adaptiveReservationTimeout !== prev) {
|
|
61
|
+
// Update the stream provider so notification-path fetches
|
|
62
|
+
// also use the adaptive timeout
|
|
63
|
+
this.stream.reservationTimeout = this.adaptiveReservationTimeout;
|
|
64
|
+
this.logger.info('stream-reservation-timeout-adjusted', {
|
|
65
|
+
stream,
|
|
66
|
+
depth,
|
|
67
|
+
previousTimeoutS: prev,
|
|
68
|
+
newTimeoutS: this.adaptiveReservationTimeout,
|
|
69
|
+
configuredDefaultS: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
// Stream depth check is best-effort; don't fail the consume loop
|
|
75
|
+
}
|
|
76
|
+
}
|
|
32
77
|
async createGroup(stream, group) {
|
|
33
78
|
try {
|
|
34
79
|
await this.stream.createConsumerGroup(stream, group);
|
|
@@ -107,6 +152,8 @@ class ConsumptionManager {
|
|
|
107
152
|
if (this.lifecycleManager.isStopped(group, consumer, stream)) {
|
|
108
153
|
return;
|
|
109
154
|
}
|
|
155
|
+
// Adapt reservation timeout based on stream depth
|
|
156
|
+
await this.adjustReservationTimeout(stream);
|
|
110
157
|
await this.throttleManager.customSleep(); // respect throttle
|
|
111
158
|
if (this.lifecycleManager.isStopped(group, consumer, stream) ||
|
|
112
159
|
this.throttleManager.isPaused()) {
|
|
@@ -183,6 +230,7 @@ class ConsumptionManager {
|
|
|
183
230
|
enableNotifications: true,
|
|
184
231
|
notificationCallback,
|
|
185
232
|
blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
|
|
233
|
+
reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
186
234
|
});
|
|
187
235
|
// Don't block here - let the worker initialization complete
|
|
188
236
|
// The notification system will handle message processing asynchronously
|
|
@@ -225,14 +273,17 @@ class ConsumptionManager {
|
|
|
225
273
|
const streamDuration = config_1.HMSH_BLOCK_TIME_MS + Math.round(config_1.HMSH_BLOCK_TIME_MS * Math.random());
|
|
226
274
|
try {
|
|
227
275
|
let messages = [];
|
|
276
|
+
// Adapt reservation timeout based on stream depth
|
|
277
|
+
await this.adjustReservationTimeout(stream);
|
|
228
278
|
if (!this.hasReachedMaxBackoff) {
|
|
229
279
|
// Normal mode: try with backoff and finite retries
|
|
230
280
|
const features = this.stream.getProviderSpecificFeatures();
|
|
231
281
|
const isPostgres = features.supportsParallelProcessing;
|
|
232
|
-
const batchSize = isPostgres ?
|
|
282
|
+
const batchSize = isPostgres ? config_1.HMSH_BATCH_SIZE : 1;
|
|
233
283
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
234
284
|
blockTimeout: streamDuration,
|
|
235
285
|
batchSize,
|
|
286
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
236
287
|
enableBackoff: true,
|
|
237
288
|
initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
|
|
238
289
|
maxBackoff: config_1.MAX_STREAM_BACKOFF,
|
|
@@ -243,10 +294,11 @@ class ConsumptionManager {
|
|
|
243
294
|
// Fallback mode: just try once, no backoff
|
|
244
295
|
const features = this.stream.getProviderSpecificFeatures();
|
|
245
296
|
const isPostgres = features.supportsParallelProcessing;
|
|
246
|
-
const batchSize = isPostgres ?
|
|
297
|
+
const batchSize = isPostgres ? config_1.HMSH_BATCH_SIZE : 1;
|
|
247
298
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
248
299
|
blockTimeout: streamDuration,
|
|
249
300
|
batchSize,
|
|
301
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
250
302
|
enableBackoff: false,
|
|
251
303
|
maxRetries: 1,
|
|
252
304
|
});
|
|
@@ -542,4 +594,8 @@ class ConsumptionManager {
|
|
|
542
594
|
return Array.isArray(result) && Array.isArray(result[0]);
|
|
543
595
|
}
|
|
544
596
|
}
|
|
597
|
+
ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
|
|
598
|
+
ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
|
|
599
|
+
ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
|
|
600
|
+
ConsumptionManager.RESERVATION_TIMEOUT_MAX_S = 600;
|
|
545
601
|
exports.ConsumptionManager = ConsumptionManager;
|
|
@@ -22,6 +22,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
22
22
|
batchSize?: number;
|
|
23
23
|
blockTimeout?: number;
|
|
24
24
|
autoAck?: boolean;
|
|
25
|
+
reservationTimeout?: number;
|
|
25
26
|
enableBackoff?: boolean;
|
|
26
27
|
initialBackoff?: number;
|
|
27
28
|
maxBackoff?: number;
|
|
@@ -41,6 +42,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
41
42
|
maxRetries?: number;
|
|
42
43
|
limit?: number;
|
|
43
44
|
}): Promise<StreamMessage[]>;
|
|
45
|
+
reservationTimeout: number;
|
|
44
46
|
abstract getStreamStats(streamName: string): Promise<StreamStats>;
|
|
45
47
|
abstract getStreamDepth(streamName: string): Promise<number>;
|
|
46
48
|
abstract getStreamDepths(streamName: {
|
|
@@ -3,6 +3,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.StreamService = void 0;
|
|
4
4
|
class StreamService {
|
|
5
5
|
constructor(streamClient, storeClient, config = {}) {
|
|
6
|
+
// Adaptive reservation timeout — set by the consumption manager
|
|
7
|
+
// based on stream depth. Providers read this when reserving messages.
|
|
8
|
+
this.reservationTimeout = 30;
|
|
6
9
|
this.streamClient = streamClient;
|
|
7
10
|
this.storeClient = storeClient;
|
|
8
11
|
this.config = config;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.retryMessages = exports.deadLetterMessages = exports.ackAndDelete = exports.deleteMessages = exports.acknowledgeMessages = exports.fetchMessages = exports.buildPublishSQL = exports.publishMessages = void 0;
|
|
4
|
+
const enums_1 = require("../../../../modules/enums");
|
|
4
5
|
const utils_1 = require("../../../../modules/utils");
|
|
5
6
|
/**
|
|
6
7
|
* Publish messages to a stream. Can be used within a transaction.
|
|
@@ -205,7 +206,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
|
|
|
205
206
|
while (retries < maxRetries) {
|
|
206
207
|
retries++;
|
|
207
208
|
const batchSize = options?.batchSize || 1;
|
|
208
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
209
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
209
210
|
const res = await client.query(`UPDATE ${tableName}
|
|
210
211
|
SET reserved_at = NOW(), reserved_by = $3
|
|
211
212
|
WHERE id IN (
|
|
@@ -79,12 +79,12 @@ class PostgresStreamService extends index_1.StreamService {
|
|
|
79
79
|
}
|
|
80
80
|
async checkForMissedMessages() {
|
|
81
81
|
await this.notificationManager.checkForMissedMessages(async (instance, consumer) => {
|
|
82
|
-
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
82
|
+
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: instance.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
83
83
|
});
|
|
84
84
|
}
|
|
85
85
|
async fetchAndDeliverMessages(consumer) {
|
|
86
86
|
try {
|
|
87
|
-
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
87
|
+
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
88
88
|
if (messages.length > 0) {
|
|
89
89
|
consumer.callback(messages);
|
|
90
90
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
10
|
exports.publishMessagesSecured = exports.deadLetterMessagesSecured = exports.ackAndDeleteSecured = exports.fetchMessagesSecured = void 0;
|
|
11
|
+
const enums_1 = require("../../../../modules/enums");
|
|
11
12
|
const utils_1 = require("../../../../modules/utils");
|
|
12
13
|
const utils_2 = require("../../../../modules/utils");
|
|
13
14
|
/**
|
|
@@ -19,7 +20,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
|
|
|
19
20
|
const maxBackoff = options?.maxBackoff ?? 3000;
|
|
20
21
|
const maxRetries = options?.maxRetries ?? 3;
|
|
21
22
|
const batchSize = options?.batchSize || 1;
|
|
22
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
23
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
23
24
|
let backoff = initialBackoff;
|
|
24
25
|
let retries = 0;
|
|
25
26
|
try {
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.6",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|