@hotmeshio/hotmesh 0.14.4 → 0.14.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/modules/enums.d.ts +42 -0
- package/build/modules/enums.js +44 -2
- package/build/package.json +3 -2
- package/build/services/activities/activity/process.js +31 -1
- package/build/services/activities/hook.d.ts +10 -1
- package/build/services/activities/hook.js +45 -6
- package/build/services/dba/index.d.ts +1 -0
- package/build/services/dba/index.js +20 -3
- package/build/services/durable/client.js +13 -3
- package/build/services/durable/handle.d.ts +8 -1
- package/build/services/durable/handle.js +9 -1
- package/build/services/durable/workflow/signal.d.ts +1 -1
- package/build/services/durable/workflow/signal.js +2 -1
- package/build/services/router/config/index.d.ts +2 -2
- package/build/services/router/config/index.js +3 -1
- package/build/services/router/consumption/index.d.ts +13 -0
- package/build/services/router/consumption/index.js +58 -2
- package/build/services/store/index.d.ts +15 -2
- package/build/services/store/providers/postgres/kvtables.d.ts +1 -0
- package/build/services/store/providers/postgres/kvtables.js +46 -1
- package/build/services/store/providers/postgres/postgres.d.ts +25 -2
- package/build/services/store/providers/postgres/postgres.js +121 -4
- package/build/services/stream/index.d.ts +2 -0
- package/build/services/stream/index.js +3 -0
- package/build/services/stream/providers/postgres/messages.js +2 -1
- package/build/services/stream/providers/postgres/postgres.js +2 -2
- package/build/services/stream/providers/postgres/secured.js +2 -1
- package/build/services/task/index.d.ts +4 -1
- package/build/services/task/index.js +34 -6
- package/build/types/dba.d.ts +11 -0
- package/package.json +3 -2
package/build/modules/enums.d.ts
CHANGED
|
@@ -55,6 +55,12 @@ export declare const HMSH_TELEMETRY: "debug" | "info";
|
|
|
55
55
|
* Default cleanup time for signal in the db when its associated job is completed.
|
|
56
56
|
*/
|
|
57
57
|
export declare const HMSH_SIGNAL_EXPIRE = 3600;
|
|
58
|
+
/**
|
|
59
|
+
* Default TTL for pending signals (signals that arrived before the hook registered).
|
|
60
|
+
* The signaler can override this via the `$expire` field in the signal data
|
|
61
|
+
* using a natural-language duration (e.g., '1h', '24h').
|
|
62
|
+
*/
|
|
63
|
+
export declare const HMSH_PENDING_SIGNAL_EXPIRE = 600;
|
|
58
64
|
export declare const HMSH_CODE_SUCCESS = 200;
|
|
59
65
|
export declare const HMSH_CODE_PENDING = 202;
|
|
60
66
|
export declare const HMSH_CODE_NOTFOUND = 404;
|
|
@@ -155,6 +161,42 @@ export declare const HMSH_BLOCK_TIME_MS: number;
|
|
|
155
161
|
export declare const HMSH_XCLAIM_DELAY_MS: number;
|
|
156
162
|
export declare const HMSH_XCLAIM_COUNT: number;
|
|
157
163
|
export declare const HMSH_XPENDING_COUNT: number;
|
|
164
|
+
export declare const HMSH_BATCH_SIZE: number;
|
|
165
|
+
/**
|
|
166
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
167
|
+
*
|
|
168
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
169
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
170
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
171
|
+
* message becomes available to other consumers — causing duplicate
|
|
172
|
+
* delivery, collation errors, and wasted CPU.
|
|
173
|
+
*
|
|
174
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
175
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
176
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
177
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
178
|
+
* delivery under burst load without manual intervention.
|
|
179
|
+
*
|
|
180
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
181
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
182
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
183
|
+
* Setting a higher default reduces how aggressively the system must
|
|
184
|
+
* adapt during load spikes.
|
|
185
|
+
*
|
|
186
|
+
* **Symptoms of a value that is too low:**
|
|
187
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
188
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
189
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
190
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* // Production with sustained high concurrency
|
|
194
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
195
|
+
*
|
|
196
|
+
* // Low-latency environments with fast processing
|
|
197
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
198
|
+
*/
|
|
199
|
+
export declare const HMSH_RESERVATION_TIMEOUT_S: number;
|
|
158
200
|
export declare const HMSH_EXPIRE_DURATION: number;
|
|
159
201
|
export declare const HMSH_FIDELITY_SECONDS: number;
|
|
160
202
|
export declare const HMSH_SCOUT_INTERVAL_SECONDS: number;
|
package/build/modules/enums.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = void 0;
|
|
3
|
+
exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
|
|
4
|
+
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = void 0;
|
|
5
5
|
/**
|
|
6
6
|
* Determines the log level for the application. The default is 'info'.
|
|
7
7
|
*/
|
|
@@ -58,6 +58,12 @@ exports.HMSH_TELEMETRY = process.env.HMSH_TELEMETRY || 'info';
|
|
|
58
58
|
* Default cleanup time for signal in the db when its associated job is completed.
|
|
59
59
|
*/
|
|
60
60
|
exports.HMSH_SIGNAL_EXPIRE = 3600; //seconds
|
|
61
|
+
/**
|
|
62
|
+
* Default TTL for pending signals (signals that arrived before the hook registered).
|
|
63
|
+
* The signaler can override this via the `$expire` field in the signal data
|
|
64
|
+
* using a natural-language duration (e.g., '1h', '24h').
|
|
65
|
+
*/
|
|
66
|
+
exports.HMSH_PENDING_SIGNAL_EXPIRE = 600; //seconds (10 minutes)
|
|
61
67
|
// HOTMESH STATUS CODES
|
|
62
68
|
exports.HMSH_CODE_SUCCESS = 200;
|
|
63
69
|
exports.HMSH_CODE_PENDING = 202;
|
|
@@ -172,6 +178,42 @@ exports.HMSH_BLOCK_TIME_MS = process.env.HMSH_BLOCK_TIME_MS
|
|
|
172
178
|
exports.HMSH_XCLAIM_DELAY_MS = parseInt(process.env.HMSH_XCLAIM_DELAY_MS, 10) || 1000 * 60;
|
|
173
179
|
exports.HMSH_XCLAIM_COUNT = parseInt(process.env.HMSH_XCLAIM_COUNT, 10) || 3;
|
|
174
180
|
exports.HMSH_XPENDING_COUNT = parseInt(process.env.HMSH_XPENDING_COUNT, 10) || 10;
|
|
181
|
+
exports.HMSH_BATCH_SIZE = parseInt(process.env.HMSH_BATCH_SIZE, 10) || 10;
|
|
182
|
+
/**
|
|
183
|
+
* Postgres stream reservation timeout in seconds (default: 30).
|
|
184
|
+
*
|
|
185
|
+
* This is the **starting** reservation timeout for the Postgres stream
|
|
186
|
+
* consumer. When a consumer reserves a message from the stream, it must
|
|
187
|
+
* acknowledge it within this window. If processing takes longer, the
|
|
188
|
+
* message becomes available to other consumers — causing duplicate
|
|
189
|
+
* delivery, collation errors, and wasted CPU.
|
|
190
|
+
*
|
|
191
|
+
* **Adaptive behavior:** The router automatically adjusts this timeout
|
|
192
|
+
* at runtime based on stream depth. When the queue backs up (depth > 100),
|
|
193
|
+
* the timeout doubles (up to 600s). When the queue drains (depth < 10),
|
|
194
|
+
* it halves back toward this configured default. This prevents duplicate
|
|
195
|
+
* delivery under burst load without manual intervention.
|
|
196
|
+
*
|
|
197
|
+
* **When to increase this value:** If you see `process-event-*-error`
|
|
198
|
+
* warnings at `warn` level or `stream-reservation-timeout-adjusted` logs
|
|
199
|
+
* scaling up frequently, your baseline is too low for your workload.
|
|
200
|
+
* Setting a higher default reduces how aggressively the system must
|
|
201
|
+
* adapt during load spikes.
|
|
202
|
+
*
|
|
203
|
+
* **Symptoms of a value that is too low:**
|
|
204
|
+
* - `collation-error` from `verifySyntheticInteger` (warn level)
|
|
205
|
+
* - `process-event-collation-rate-exceeded` warning with guidance
|
|
206
|
+
* - `stream-reservation-timeout-adjusted` logs showing rapid scaling
|
|
207
|
+
* - Workflow stalls or timeouts under sustained concurrent load
|
|
208
|
+
*
|
|
209
|
+
* @example
|
|
210
|
+
* // Production with sustained high concurrency
|
|
211
|
+
* HMSH_RESERVATION_TIMEOUT_S=120
|
|
212
|
+
*
|
|
213
|
+
* // Low-latency environments with fast processing
|
|
214
|
+
* HMSH_RESERVATION_TIMEOUT_S=30 (default)
|
|
215
|
+
*/
|
|
216
|
+
exports.HMSH_RESERVATION_TIMEOUT_S = parseInt(process.env.HMSH_RESERVATION_TIMEOUT_S, 10) || 30;
|
|
175
217
|
// TASK WORKER
|
|
176
218
|
exports.HMSH_EXPIRE_DURATION = parseInt(process.env.HMSH_EXPIRE_DURATION, 10) || 1;
|
|
177
219
|
const BASE_FIDELITY_SECONDS = 5;
|
package/build/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.6",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|
|
@@ -6,6 +6,11 @@ const errors_1 = require("../../../modules/errors");
|
|
|
6
6
|
const collator_1 = require("../../collator");
|
|
7
7
|
const telemetry_1 = require("../../telemetry");
|
|
8
8
|
const stream_1 = require("../../../types/stream");
|
|
9
|
+
// Per-instance collation error tracking for reservation timeout detection
|
|
10
|
+
let collationErrorCount = 0;
|
|
11
|
+
let collationWindowStart = Date.now();
|
|
12
|
+
const COLLATION_WARN_THRESHOLD = 10;
|
|
13
|
+
const COLLATION_WINDOW_MS = 60000;
|
|
9
14
|
async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, code = 200, type = 'output') {
|
|
10
15
|
instance.setLeg(2);
|
|
11
16
|
const jid = instance.context.metadata.jid;
|
|
@@ -66,7 +71,32 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
66
71
|
}
|
|
67
72
|
catch (error) {
|
|
68
73
|
if (error instanceof errors_1.CollationError) {
|
|
69
|
-
|
|
74
|
+
// INACTIVE is legitimate duplicate detection — the Postgres atomic
|
|
75
|
+
// CTE (collateLeg2Entry) serializes via row locks, so the GUID
|
|
76
|
+
// ledger value is correct. Silent ack is the right behavior:
|
|
77
|
+
// the work was already done by a prior delivery of this message.
|
|
78
|
+
const now = Date.now();
|
|
79
|
+
if (now - collationWindowStart > COLLATION_WINDOW_MS) {
|
|
80
|
+
collationErrorCount = 0;
|
|
81
|
+
collationWindowStart = now;
|
|
82
|
+
}
|
|
83
|
+
collationErrorCount++;
|
|
84
|
+
if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
|
|
85
|
+
instance.logger.warn('process-event-collation-rate-exceeded', {
|
|
86
|
+
count: collationErrorCount,
|
|
87
|
+
windowMs: COLLATION_WINDOW_MS,
|
|
88
|
+
reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
89
|
+
message: `${COLLATION_WARN_THRESHOLD} collation errors in ${COLLATION_WINDOW_MS / 1000}s. ` +
|
|
90
|
+
`This typically means HMSH_RESERVATION_TIMEOUT_S (currently ${enums_1.HMSH_RESERVATION_TIMEOUT_S}s) ` +
|
|
91
|
+
`is too short for your workload — messages are being re-reserved before processing completes, ` +
|
|
92
|
+
`causing duplicate delivery. Increase HMSH_RESERVATION_TIMEOUT_S.`,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
instance.logger.warn(`process-event-${error.fault}-error`, {
|
|
96
|
+
jid: instance.context.metadata.jid,
|
|
97
|
+
aid: instance.metadata.aid,
|
|
98
|
+
error,
|
|
99
|
+
});
|
|
70
100
|
return;
|
|
71
101
|
}
|
|
72
102
|
else if (error instanceof errors_1.InactiveJobError) {
|
|
@@ -152,9 +152,18 @@ declare class Hook extends Activity {
|
|
|
152
152
|
isConfiguredAsHook(): boolean;
|
|
153
153
|
doesHook(): boolean;
|
|
154
154
|
doHook(telemetry: TelemetryService): Promise<void>;
|
|
155
|
+
/**
|
|
156
|
+
* Re-publishes a pending signal as a WEBHOOK stream message so the
|
|
157
|
+
* normal leg2 dispatch path processes it. Called when leg1's
|
|
158
|
+
* setHookSignal atomically detected and consumed a pending signal.
|
|
159
|
+
*/
|
|
160
|
+
private redeliverPendingSignal;
|
|
155
161
|
doPassThrough(telemetry: TelemetryService): Promise<void>;
|
|
156
162
|
getHookRule(topic: string): Promise<HookRule | undefined>;
|
|
157
|
-
registerHook(transaction?: ProviderTransaction): Promise<
|
|
163
|
+
registerHook(transaction?: ProviderTransaction): Promise<{
|
|
164
|
+
jobId?: string;
|
|
165
|
+
pending?: string;
|
|
166
|
+
} | void>;
|
|
158
167
|
processWebHookEvent(status?: StreamStatus, code?: StreamCode): Promise<JobStatus | void>;
|
|
159
168
|
processTimeHookEvent(jobId: string): Promise<JobStatus | void>;
|
|
160
169
|
}
|
|
@@ -6,6 +6,7 @@ const pipe_1 = require("../pipe");
|
|
|
6
6
|
const task_1 = require("../task");
|
|
7
7
|
const telemetry_1 = require("../telemetry");
|
|
8
8
|
const stream_1 = require("../../types/stream");
|
|
9
|
+
const utils_1 = require("../../modules/utils");
|
|
9
10
|
const activity_1 = require("./activity");
|
|
10
11
|
/**
|
|
11
12
|
* A versatile pause/resume activity that supports three distinct patterns:
|
|
@@ -203,7 +204,7 @@ class Hook extends activity_1.Activity {
|
|
|
203
204
|
}
|
|
204
205
|
async doHook(telemetry) {
|
|
205
206
|
const transaction = this.store.transact();
|
|
206
|
-
await this.registerHook(transaction);
|
|
207
|
+
const hookResult = await this.registerHook(transaction);
|
|
207
208
|
this.mapOutputData();
|
|
208
209
|
this.mapJobData();
|
|
209
210
|
await this.setState(transaction);
|
|
@@ -211,6 +212,38 @@ class Hook extends activity_1.Activity {
|
|
|
211
212
|
await this.setStatus(0, transaction);
|
|
212
213
|
await transaction.exec();
|
|
213
214
|
telemetry.mapActivityAttributes();
|
|
215
|
+
//if a pending signal was detected (signal arrived before hook
|
|
216
|
+
//registered), re-publish the WEBHOOK so leg2 processes it
|
|
217
|
+
//now that the hook signal is committed and state is saved
|
|
218
|
+
if (hookResult && hookResult.pending) {
|
|
219
|
+
await this.redeliverPendingSignal(hookResult.pending);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Re-publishes a pending signal as a WEBHOOK stream message so the
|
|
224
|
+
* normal leg2 dispatch path processes it. Called when leg1's
|
|
225
|
+
* setHookSignal atomically detected and consumed a pending signal.
|
|
226
|
+
*/
|
|
227
|
+
async redeliverPendingSignal(pendingJson) {
|
|
228
|
+
const data = JSON.parse(pendingJson);
|
|
229
|
+
const hookRule = await this.getHookRule(this.config.hook.topic);
|
|
230
|
+
this.logger.warn('hook-pending-signal-redelivery', {
|
|
231
|
+
topic: this.config.hook.topic,
|
|
232
|
+
aid: hookRule?.to || this.metadata.aid,
|
|
233
|
+
jid: this.context.metadata.jid,
|
|
234
|
+
});
|
|
235
|
+
const streamData = {
|
|
236
|
+
type: stream_1.StreamDataType.WEBHOOK,
|
|
237
|
+
status: stream_1.StreamStatus.SUCCESS,
|
|
238
|
+
code: 200,
|
|
239
|
+
metadata: {
|
|
240
|
+
guid: (0, utils_1.guid)(),
|
|
241
|
+
aid: hookRule?.to || this.metadata.aid,
|
|
242
|
+
topic: this.config.hook.topic,
|
|
243
|
+
},
|
|
244
|
+
data,
|
|
245
|
+
};
|
|
246
|
+
await this.engine.router?.publishMessage(null, streamData);
|
|
214
247
|
}
|
|
215
248
|
async doPassThrough(telemetry) {
|
|
216
249
|
this.adjacencyList = await this.filterAdjacent();
|
|
@@ -225,19 +258,25 @@ class Hook extends activity_1.Activity {
|
|
|
225
258
|
return rules?.[topic]?.[0];
|
|
226
259
|
}
|
|
227
260
|
async registerHook(transaction) {
|
|
228
|
-
let
|
|
261
|
+
let jobId;
|
|
262
|
+
let pending;
|
|
229
263
|
if (this.config.hook?.topic) {
|
|
230
|
-
|
|
264
|
+
//hook signal is set standalone (not in the transaction) so the
|
|
265
|
+
//single CTE query can atomically detect a pending signal collision
|
|
266
|
+
const hookResult = await this.engine.taskService.registerWebHook(this.config.hook.topic, this.context, this.resolveDad(), this.context.metadata.expire);
|
|
267
|
+
jobId = hookResult.jobId;
|
|
268
|
+
pending = hookResult.pending;
|
|
231
269
|
}
|
|
232
270
|
if (this.config.sleep) {
|
|
233
271
|
const duration = pipe_1.Pipe.resolve(this.config.sleep, this.context);
|
|
234
272
|
if (!isNaN(duration) && Number(duration) > 0) {
|
|
235
273
|
await this.engine.taskService.registerTimeHook(this.context.metadata.jid, this.context.metadata.gid, `${this.metadata.aid}${this.metadata.dad || ''}`, 'sleep', duration, this.metadata.dad || '', transaction);
|
|
236
|
-
if (!
|
|
237
|
-
|
|
274
|
+
if (!jobId)
|
|
275
|
+
jobId = this.context.metadata.jid;
|
|
238
276
|
}
|
|
239
277
|
}
|
|
240
|
-
|
|
278
|
+
if (jobId)
|
|
279
|
+
return { jobId, pending };
|
|
241
280
|
}
|
|
242
281
|
async processWebHookEvent(status = stream_1.StreamStatus.SUCCESS, code = 200) {
|
|
243
282
|
this.logger.debug('hook-process-web-hook-event', {
|
|
@@ -14,6 +14,7 @@ import { PostgresClientType } from '../../types/postgres';
|
|
|
14
14
|
* | `{appId}.jobs_attributes` | Execution artifacts (`adata`, `hmark`, `status`, `other`) that are only needed during workflow execution |
|
|
15
15
|
* | `{appId}.engine_streams` | Processed engine stream messages with `expired_at` set |
|
|
16
16
|
* | `{appId}.worker_streams` | Processed worker stream messages with `expired_at` set |
|
|
17
|
+
* | `{appId}.signal_registry` | Consumed hook signals and stale pending signals with `expiry` set |
|
|
17
18
|
*
|
|
18
19
|
* The `DBA` service addresses this with two methods:
|
|
19
20
|
*
|
|
@@ -17,6 +17,7 @@ const postgres_1 = require("../connector/providers/postgres");
|
|
|
17
17
|
* | `{appId}.jobs_attributes` | Execution artifacts (`adata`, `hmark`, `status`, `other`) that are only needed during workflow execution |
|
|
18
18
|
* | `{appId}.engine_streams` | Processed engine stream messages with `expired_at` set |
|
|
19
19
|
* | `{appId}.worker_streams` | Processed worker stream messages with `expired_at` set |
|
|
20
|
+
* | `{appId}.signal_registry` | Consumed hook signals and stale pending signals with `expiry` set |
|
|
20
21
|
*
|
|
21
22
|
* The `DBA` service addresses this with two methods:
|
|
22
23
|
*
|
|
@@ -186,7 +187,8 @@ class DBA {
|
|
|
186
187
|
prune_engine_streams BOOLEAN DEFAULT NULL,
|
|
187
188
|
prune_worker_streams BOOLEAN DEFAULT NULL,
|
|
188
189
|
engine_streams_retention INTERVAL DEFAULT NULL,
|
|
189
|
-
worker_streams_retention INTERVAL DEFAULT NULL
|
|
190
|
+
worker_streams_retention INTERVAL DEFAULT NULL,
|
|
191
|
+
prune_signals BOOLEAN DEFAULT TRUE
|
|
190
192
|
)
|
|
191
193
|
RETURNS TABLE(
|
|
192
194
|
deleted_jobs BIGINT,
|
|
@@ -195,7 +197,8 @@ class DBA {
|
|
|
195
197
|
deleted_worker_streams BIGINT,
|
|
196
198
|
stripped_attributes BIGINT,
|
|
197
199
|
deleted_transient BIGINT,
|
|
198
|
-
marked_pruned BIGINT
|
|
200
|
+
marked_pruned BIGINT,
|
|
201
|
+
deleted_signals BIGINT
|
|
199
202
|
)
|
|
200
203
|
LANGUAGE plpgsql
|
|
201
204
|
AS $$
|
|
@@ -206,6 +209,7 @@ class DBA {
|
|
|
206
209
|
v_stripped_attributes BIGINT := 0;
|
|
207
210
|
v_deleted_transient BIGINT := 0;
|
|
208
211
|
v_marked_pruned BIGINT := 0;
|
|
212
|
+
v_deleted_signals BIGINT := 0;
|
|
209
213
|
v_do_engine BOOLEAN;
|
|
210
214
|
v_do_worker BOOLEAN;
|
|
211
215
|
v_engine_retention INTERVAL;
|
|
@@ -287,6 +291,15 @@ class DBA {
|
|
|
287
291
|
GET DIAGNOSTICS v_marked_pruned = ROW_COUNT;
|
|
288
292
|
END IF;
|
|
289
293
|
|
|
294
|
+
-- 6. Hard-delete expired signal_registry rows.
|
|
295
|
+
-- Includes consumed hook signals and stale pending signals.
|
|
296
|
+
IF prune_signals THEN
|
|
297
|
+
DELETE FROM ${schema}.signal_registry
|
|
298
|
+
WHERE expiry IS NOT NULL
|
|
299
|
+
AND expiry <= NOW();
|
|
300
|
+
GET DIAGNOSTICS v_deleted_signals = ROW_COUNT;
|
|
301
|
+
END IF;
|
|
302
|
+
|
|
290
303
|
deleted_jobs := v_deleted_jobs;
|
|
291
304
|
deleted_streams := v_deleted_engine_streams + v_deleted_worker_streams;
|
|
292
305
|
deleted_engine_streams := v_deleted_engine_streams;
|
|
@@ -294,6 +307,7 @@ class DBA {
|
|
|
294
307
|
stripped_attributes := v_stripped_attributes;
|
|
295
308
|
deleted_transient := v_deleted_transient;
|
|
296
309
|
marked_pruned := v_marked_pruned;
|
|
310
|
+
deleted_signals := v_deleted_signals;
|
|
297
311
|
RETURN NEXT;
|
|
298
312
|
END;
|
|
299
313
|
$$;
|
|
@@ -391,12 +405,14 @@ class DBA {
|
|
|
391
405
|
const workerStreams = options.workerStreams ?? null;
|
|
392
406
|
const engineStreamsExpire = options.engineStreamsExpire ?? null;
|
|
393
407
|
const workerStreamsExpire = options.workerStreamsExpire ?? null;
|
|
408
|
+
const signals = options.signals ?? true;
|
|
394
409
|
await DBA.deploy(options.connection, options.appId);
|
|
395
410
|
const { client, release } = await DBA.getClient(options.connection);
|
|
396
411
|
try {
|
|
397
|
-
const result = await client.query(`SELECT * FROM ${schema}.prune($1::interval, $2::boolean, $3::boolean, $4::boolean, $5::text[], $6::boolean, $7::boolean, $8::boolean, $9::boolean, $10::interval, $11::interval)`, [
|
|
412
|
+
const result = await client.query(`SELECT * FROM ${schema}.prune($1::interval, $2::boolean, $3::boolean, $4::boolean, $5::text[], $6::boolean, $7::boolean, $8::boolean, $9::boolean, $10::interval, $11::interval, $12::boolean)`, [
|
|
398
413
|
expire, jobs, streams, attributes, entities, pruneTransient, keepHmark,
|
|
399
414
|
engineStreams, workerStreams, engineStreamsExpire, workerStreamsExpire,
|
|
415
|
+
signals,
|
|
400
416
|
]);
|
|
401
417
|
const row = result.rows[0];
|
|
402
418
|
return {
|
|
@@ -407,6 +423,7 @@ class DBA {
|
|
|
407
423
|
attributes: Number(row.stripped_attributes),
|
|
408
424
|
transient: Number(row.deleted_transient),
|
|
409
425
|
marked: Number(row.marked_pruned),
|
|
426
|
+
signals: Number(row.deleted_signals),
|
|
410
427
|
};
|
|
411
428
|
}
|
|
412
429
|
finally {
|
|
@@ -156,11 +156,21 @@ class ClientService {
|
|
|
156
156
|
return new handle_1.WorkflowHandleService(hotMeshClient, workflowTopic, jobId);
|
|
157
157
|
},
|
|
158
158
|
/**
|
|
159
|
-
* Sends a message payload to a running workflow that is paused and awaiting the signal
|
|
159
|
+
* Sends a message payload to a running workflow that is paused and awaiting the signal.
|
|
160
|
+
*
|
|
161
|
+
* If the signal arrives before the workflow has registered its hook
|
|
162
|
+
* (race condition under load), it is buffered as a pending signal
|
|
163
|
+
* for up to `expire` (default 10 minutes). Use a longer duration
|
|
164
|
+
* when signaling "early on purpose" (e.g., depositing a payload
|
|
165
|
+
* hours before the workflow starts).
|
|
160
166
|
*/
|
|
161
|
-
signal: async (signalId, data, namespace) => {
|
|
167
|
+
signal: async (signalId, data, namespace, expire) => {
|
|
162
168
|
const topic = `${namespace ?? factory_1.APP_ID}.wfs.signal`;
|
|
163
|
-
return await (await this.getHotMeshClient(topic, namespace)).signal(topic, {
|
|
169
|
+
return await (await this.getHotMeshClient(topic, namespace)).signal(topic, {
|
|
170
|
+
id: signalId,
|
|
171
|
+
data,
|
|
172
|
+
...(expire ? { $expire: expire } : {}),
|
|
173
|
+
});
|
|
164
174
|
},
|
|
165
175
|
/**
|
|
166
176
|
* Spawns an a new, isolated execution cycle within the same job.
|
|
@@ -57,10 +57,17 @@ export declare class WorkflowHandleService {
|
|
|
57
57
|
* on `Durable.workflow.condition(signalId)`, it resumes with the
|
|
58
58
|
* provided data.
|
|
59
59
|
*
|
|
60
|
+
* If the signal arrives before the workflow has registered its hook
|
|
61
|
+
* (race condition under load), it is buffered as a pending signal
|
|
62
|
+
* for up to `expire` (default 10 minutes). Use a longer duration
|
|
63
|
+
* when signaling "early on purpose" (e.g., depositing a payload
|
|
64
|
+
* hours before the workflow starts).
|
|
65
|
+
*
|
|
60
66
|
* @param signalId - Matches the `signalId` passed to `condition()`.
|
|
61
67
|
* @param data - Payload delivered to the waiting workflow.
|
|
68
|
+
* @param expire - Optional pending signal TTL (e.g., '1h', '30d'). Default '10m'.
|
|
62
69
|
*/
|
|
63
|
-
signal(signalId: string, data: Record<any, any
|
|
70
|
+
signal(signalId: string, data: Record<any, any>, expire?: string): Promise<void>;
|
|
64
71
|
/**
|
|
65
72
|
* Returns the current workflow state. For a completed workflow this
|
|
66
73
|
* is the final output; for a running workflow it reflects the latest
|
|
@@ -58,13 +58,21 @@ class WorkflowHandleService {
|
|
|
58
58
|
* on `Durable.workflow.condition(signalId)`, it resumes with the
|
|
59
59
|
* provided data.
|
|
60
60
|
*
|
|
61
|
+
* If the signal arrives before the workflow has registered its hook
|
|
62
|
+
* (race condition under load), it is buffered as a pending signal
|
|
63
|
+
* for up to `expire` (default 10 minutes). Use a longer duration
|
|
64
|
+
* when signaling "early on purpose" (e.g., depositing a payload
|
|
65
|
+
* hours before the workflow starts).
|
|
66
|
+
*
|
|
61
67
|
* @param signalId - Matches the `signalId` passed to `condition()`.
|
|
62
68
|
* @param data - Payload delivered to the waiting workflow.
|
|
69
|
+
* @param expire - Optional pending signal TTL (e.g., '1h', '30d'). Default '10m'.
|
|
63
70
|
*/
|
|
64
|
-
async signal(signalId, data) {
|
|
71
|
+
async signal(signalId, data, expire) {
|
|
65
72
|
await this.hotMesh.signal(`${this.hotMesh.appId}.wfs.signal`, {
|
|
66
73
|
id: signalId,
|
|
67
74
|
data,
|
|
75
|
+
...(expire ? { $expire: expire } : {}),
|
|
68
76
|
});
|
|
69
77
|
}
|
|
70
78
|
/**
|
|
@@ -55,4 +55,4 @@
|
|
|
55
55
|
* @param {Record<any, any>} data - The payload to deliver to the waiting workflow.
|
|
56
56
|
* @returns {Promise<string>} The resulting hook/stream ID.
|
|
57
57
|
*/
|
|
58
|
-
export declare function signal(signalId: string, data: Record<any, any
|
|
58
|
+
export declare function signal(signalId: string, data: Record<any, any>, expire?: string): Promise<string>;
|
|
@@ -60,7 +60,7 @@ const isSideEffectAllowed_1 = require("./isSideEffectAllowed");
|
|
|
60
60
|
* @param {Record<any, any>} data - The payload to deliver to the waiting workflow.
|
|
61
61
|
* @returns {Promise<string>} The resulting hook/stream ID.
|
|
62
62
|
*/
|
|
63
|
-
async function signal(signalId, data) {
|
|
63
|
+
async function signal(signalId, data, expire) {
|
|
64
64
|
const store = common_1.asyncLocalStorage.getStore();
|
|
65
65
|
const workflowTopic = store.get('workflowTopic');
|
|
66
66
|
const connection = store.get('connection');
|
|
@@ -73,6 +73,7 @@ async function signal(signalId, data) {
|
|
|
73
73
|
return await hotMeshClient.signal(`${namespace}.wfs.signal`, {
|
|
74
74
|
id: signalId,
|
|
75
75
|
data,
|
|
76
|
+
...(expire ? { $expire: expire } : {}),
|
|
76
77
|
});
|
|
77
78
|
}
|
|
78
79
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
1
|
+
import { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_RESERVATION_TIMEOUT_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD } from '../../../modules/enums';
|
|
2
2
|
import { RouterConfig } from '../../../types/stream';
|
|
3
3
|
export declare class RouterConfigManager {
|
|
4
4
|
static validateThrottle(delayInMillis: number): void;
|
|
@@ -8,4 +8,4 @@ export declare class RouterConfigManager {
|
|
|
8
8
|
readonly: boolean;
|
|
9
9
|
};
|
|
10
10
|
}
|
|
11
|
-
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
11
|
+
export { HMSH_BLOCK_TIME_MS, HMSH_MAX_RETRIES, HMSH_MAX_TIMEOUT_MS, HMSH_GRADUATED_INTERVAL_MS, HMSH_CODE_UNACKED, HMSH_CODE_UNKNOWN, HMSH_STATUS_UNKNOWN, HMSH_XCLAIM_COUNT, HMSH_XCLAIM_DELAY_MS, HMSH_XPENDING_COUNT, HMSH_BATCH_SIZE, HMSH_RESERVATION_TIMEOUT_S, MAX_DELAY, MAX_STREAM_BACKOFF, INITIAL_STREAM_BACKOFF, MAX_STREAM_RETRIES, HMSH_POISON_MESSAGE_THRESHOLD, };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
3
|
+
exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.MAX_DELAY = exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_XCLAIM_COUNT = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_UNACKED = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_RETRIES = exports.HMSH_BLOCK_TIME_MS = exports.RouterConfigManager = void 0;
|
|
4
4
|
const enums_1 = require("../../../modules/enums");
|
|
5
5
|
Object.defineProperty(exports, "HMSH_BLOCK_TIME_MS", { enumerable: true, get: function () { return enums_1.HMSH_BLOCK_TIME_MS; } });
|
|
6
6
|
Object.defineProperty(exports, "HMSH_MAX_RETRIES", { enumerable: true, get: function () { return enums_1.HMSH_MAX_RETRIES; } });
|
|
@@ -12,6 +12,8 @@ Object.defineProperty(exports, "HMSH_STATUS_UNKNOWN", { enumerable: true, get: f
|
|
|
12
12
|
Object.defineProperty(exports, "HMSH_XCLAIM_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_COUNT; } });
|
|
13
13
|
Object.defineProperty(exports, "HMSH_XCLAIM_DELAY_MS", { enumerable: true, get: function () { return enums_1.HMSH_XCLAIM_DELAY_MS; } });
|
|
14
14
|
Object.defineProperty(exports, "HMSH_XPENDING_COUNT", { enumerable: true, get: function () { return enums_1.HMSH_XPENDING_COUNT; } });
|
|
15
|
+
Object.defineProperty(exports, "HMSH_BATCH_SIZE", { enumerable: true, get: function () { return enums_1.HMSH_BATCH_SIZE; } });
|
|
16
|
+
Object.defineProperty(exports, "HMSH_RESERVATION_TIMEOUT_S", { enumerable: true, get: function () { return enums_1.HMSH_RESERVATION_TIMEOUT_S; } });
|
|
15
17
|
Object.defineProperty(exports, "MAX_DELAY", { enumerable: true, get: function () { return enums_1.MAX_DELAY; } });
|
|
16
18
|
Object.defineProperty(exports, "MAX_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.MAX_STREAM_BACKOFF; } });
|
|
17
19
|
Object.defineProperty(exports, "INITIAL_STREAM_BACKOFF", { enumerable: true, get: function () { return enums_1.INITIAL_STREAM_BACKOFF; } });
|
|
@@ -26,7 +26,20 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
26
26
|
private set hasReachedMaxBackoff(value);
|
|
27
27
|
private router;
|
|
28
28
|
private retry;
|
|
29
|
+
private adaptiveReservationTimeout;
|
|
30
|
+
private lastDepthCheckAt;
|
|
31
|
+
private static readonly DEPTH_CHECK_INTERVAL_MS;
|
|
32
|
+
private static readonly DEPTH_SCALE_UP_THRESHOLD;
|
|
33
|
+
private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
|
|
34
|
+
private static readonly RESERVATION_TIMEOUT_MAX_S;
|
|
29
35
|
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
|
|
36
|
+
/**
|
|
37
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
38
|
+
* from the consume loop. When depth is high, messages take longer to
|
|
39
|
+
* process, so the reservation window must grow to prevent re-delivery.
|
|
40
|
+
* When depth drops, the timeout shrinks back toward the configured default.
|
|
41
|
+
*/
|
|
42
|
+
private adjustReservationTimeout;
|
|
30
43
|
createGroup(stream: string, group: string): Promise<void>;
|
|
31
44
|
publishMessage(topic: string, streamData: StreamData | StreamDataResponse, transaction?: ProviderTransaction): Promise<string | ProviderTransaction>;
|
|
32
45
|
consumeMessages(stream: string, group: string, consumer: string, callback: (streamData: StreamData) => Promise<StreamDataResponse | void>): Promise<void>;
|
|
@@ -17,6 +17,11 @@ class ConsumptionManager {
|
|
|
17
17
|
get hasReachedMaxBackoff() { return this.router.hasReachedMaxBackoff; }
|
|
18
18
|
set hasReachedMaxBackoff(v) { this.router.hasReachedMaxBackoff = v; }
|
|
19
19
|
constructor(stream, logger, throttleManager, errorHandler, lifecycleManager, reclaimDelay, reclaimCount, appId, role, router, retry) {
|
|
20
|
+
// Adaptive reservation timeout — scales with stream depth to prevent
|
|
21
|
+
// duplicate message delivery under load. When the stream backs up,
|
|
22
|
+
// processing takes longer, so the reservation window must grow.
|
|
23
|
+
this.adaptiveReservationTimeout = config_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
24
|
+
this.lastDepthCheckAt = 0;
|
|
20
25
|
this.stream = stream;
|
|
21
26
|
this.logger = logger;
|
|
22
27
|
this.throttleManager = throttleManager;
|
|
@@ -29,6 +34,46 @@ class ConsumptionManager {
|
|
|
29
34
|
this.router = router;
|
|
30
35
|
this.retry = retry;
|
|
31
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
39
|
+
* from the consume loop. When depth is high, messages take longer to
|
|
40
|
+
* process, so the reservation window must grow to prevent re-delivery.
|
|
41
|
+
* When depth drops, the timeout shrinks back toward the configured default.
|
|
42
|
+
*/
|
|
43
|
+
async adjustReservationTimeout(stream) {
|
|
44
|
+
const now = Date.now();
|
|
45
|
+
if (now - this.lastDepthCheckAt < ConsumptionManager.DEPTH_CHECK_INTERVAL_MS) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
this.lastDepthCheckAt = now;
|
|
49
|
+
try {
|
|
50
|
+
const depth = await this.stream.getStreamDepth(stream);
|
|
51
|
+
const prev = this.adaptiveReservationTimeout;
|
|
52
|
+
if (depth > ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD) {
|
|
53
|
+
// Scale up: double the timeout, capped at max
|
|
54
|
+
this.adaptiveReservationTimeout = Math.min(this.adaptiveReservationTimeout * 2, ConsumptionManager.RESERVATION_TIMEOUT_MAX_S);
|
|
55
|
+
}
|
|
56
|
+
else if (depth < ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD) {
|
|
57
|
+
// Scale down: halve toward the configured default
|
|
58
|
+
this.adaptiveReservationTimeout = Math.max(Math.floor(this.adaptiveReservationTimeout / 2), config_1.HMSH_RESERVATION_TIMEOUT_S);
|
|
59
|
+
}
|
|
60
|
+
if (this.adaptiveReservationTimeout !== prev) {
|
|
61
|
+
// Update the stream provider so notification-path fetches
|
|
62
|
+
// also use the adaptive timeout
|
|
63
|
+
this.stream.reservationTimeout = this.adaptiveReservationTimeout;
|
|
64
|
+
this.logger.info('stream-reservation-timeout-adjusted', {
|
|
65
|
+
stream,
|
|
66
|
+
depth,
|
|
67
|
+
previousTimeoutS: prev,
|
|
68
|
+
newTimeoutS: this.adaptiveReservationTimeout,
|
|
69
|
+
configuredDefaultS: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
// Stream depth check is best-effort; don't fail the consume loop
|
|
75
|
+
}
|
|
76
|
+
}
|
|
32
77
|
async createGroup(stream, group) {
|
|
33
78
|
try {
|
|
34
79
|
await this.stream.createConsumerGroup(stream, group);
|
|
@@ -107,6 +152,8 @@ class ConsumptionManager {
|
|
|
107
152
|
if (this.lifecycleManager.isStopped(group, consumer, stream)) {
|
|
108
153
|
return;
|
|
109
154
|
}
|
|
155
|
+
// Adapt reservation timeout based on stream depth
|
|
156
|
+
await this.adjustReservationTimeout(stream);
|
|
110
157
|
await this.throttleManager.customSleep(); // respect throttle
|
|
111
158
|
if (this.lifecycleManager.isStopped(group, consumer, stream) ||
|
|
112
159
|
this.throttleManager.isPaused()) {
|
|
@@ -183,6 +230,7 @@ class ConsumptionManager {
|
|
|
183
230
|
enableNotifications: true,
|
|
184
231
|
notificationCallback,
|
|
185
232
|
blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
|
|
233
|
+
reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
186
234
|
});
|
|
187
235
|
// Don't block here - let the worker initialization complete
|
|
188
236
|
// The notification system will handle message processing asynchronously
|
|
@@ -225,14 +273,17 @@ class ConsumptionManager {
|
|
|
225
273
|
const streamDuration = config_1.HMSH_BLOCK_TIME_MS + Math.round(config_1.HMSH_BLOCK_TIME_MS * Math.random());
|
|
226
274
|
try {
|
|
227
275
|
let messages = [];
|
|
276
|
+
// Adapt reservation timeout based on stream depth
|
|
277
|
+
await this.adjustReservationTimeout(stream);
|
|
228
278
|
if (!this.hasReachedMaxBackoff) {
|
|
229
279
|
// Normal mode: try with backoff and finite retries
|
|
230
280
|
const features = this.stream.getProviderSpecificFeatures();
|
|
231
281
|
const isPostgres = features.supportsParallelProcessing;
|
|
232
|
-
const batchSize = isPostgres ?
|
|
282
|
+
const batchSize = isPostgres ? config_1.HMSH_BATCH_SIZE : 1;
|
|
233
283
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
234
284
|
blockTimeout: streamDuration,
|
|
235
285
|
batchSize,
|
|
286
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
236
287
|
enableBackoff: true,
|
|
237
288
|
initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
|
|
238
289
|
maxBackoff: config_1.MAX_STREAM_BACKOFF,
|
|
@@ -243,10 +294,11 @@ class ConsumptionManager {
|
|
|
243
294
|
// Fallback mode: just try once, no backoff
|
|
244
295
|
const features = this.stream.getProviderSpecificFeatures();
|
|
245
296
|
const isPostgres = features.supportsParallelProcessing;
|
|
246
|
-
const batchSize = isPostgres ?
|
|
297
|
+
const batchSize = isPostgres ? config_1.HMSH_BATCH_SIZE : 1;
|
|
247
298
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
248
299
|
blockTimeout: streamDuration,
|
|
249
300
|
batchSize,
|
|
301
|
+
reservationTimeout: this.adaptiveReservationTimeout,
|
|
250
302
|
enableBackoff: false,
|
|
251
303
|
maxRetries: 1,
|
|
252
304
|
});
|
|
@@ -542,4 +594,8 @@ class ConsumptionManager {
|
|
|
542
594
|
return Array.isArray(result) && Array.isArray(result[0]);
|
|
543
595
|
}
|
|
544
596
|
}
|
|
597
|
+
ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
|
|
598
|
+
ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
|
|
599
|
+
ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
|
|
600
|
+
ConsumptionManager.RESERVATION_TIMEOUT_MAX_S = 600;
|
|
545
601
|
exports.ConsumptionManager = ConsumptionManager;
|
|
@@ -64,8 +64,21 @@ declare abstract class StoreService<Provider extends ProviderClient, Transaction
|
|
|
64
64
|
abstract setHookRules(hookRules: Record<string, HookRule[]>): Promise<any>;
|
|
65
65
|
abstract getHookRules(): Promise<Record<string, HookRule[]>>;
|
|
66
66
|
abstract getAllSymbols(): Promise<Symbols>;
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
/**
|
|
68
|
+
* Leg1: Attempts to set the hook signal. If a pending signal occupies
|
|
69
|
+
* the key (race condition), overwrites it and returns the pending data.
|
|
70
|
+
* When called with a transaction, queues the setnxex (no pending detection).
|
|
71
|
+
*/
|
|
72
|
+
abstract setHookSignal(hook: HookSignal, transaction?: TransactionProvider): Promise<{
|
|
73
|
+
success: boolean;
|
|
74
|
+
pendingData?: string;
|
|
75
|
+
}>;
|
|
76
|
+
/**
|
|
77
|
+
* Leg2: Atomically gets the hook signal OR inserts a pending signal
|
|
78
|
+
* if no hook is registered yet (early signal). Returns the hook
|
|
79
|
+
* signal value, or undefined if we stored a pending signal instead.
|
|
80
|
+
*/
|
|
81
|
+
abstract getHookSignal(topic: string, resolved: string, pendingData?: string, pendingExpire?: number): Promise<string | undefined>;
|
|
69
82
|
abstract deleteHookSignal(topic: string, resolved: string): Promise<number | undefined>;
|
|
70
83
|
abstract addTaskQueues(keys: string[]): Promise<void>;
|
|
71
84
|
abstract getActiveTaskQueue(): Promise<string | null>;
|
|
@@ -10,6 +10,7 @@ export declare const KVTables: (context: PostgresStoreService) => {
|
|
|
10
10
|
hashStringToInt(str: string): number;
|
|
11
11
|
waitForTablesCreation(lockId: number, appName: string): Promise<void>;
|
|
12
12
|
checkIfTablesExist(client: PostgresClientType, appName: string): Promise<boolean>;
|
|
13
|
+
migrate(client: PostgresClientType | PostgresPoolClientType, appName: string): Promise<void>;
|
|
13
14
|
createTables(client: PostgresClientType | PostgresPoolClientType, appName: string): Promise<void>;
|
|
14
15
|
getTableNames(appName: string): string[];
|
|
15
16
|
getTableDefinitions(appName: string): Array<{
|
|
@@ -26,7 +26,8 @@ const KVTables = (context) => ({
|
|
|
26
26
|
// First, check if tables already exist (no lock needed)
|
|
27
27
|
const tablesExist = await this.checkIfTablesExist(client, appName);
|
|
28
28
|
if (tablesExist) {
|
|
29
|
-
// Tables
|
|
29
|
+
// Tables exist; apply any pending migrations
|
|
30
|
+
await this.migrate(client, appName);
|
|
30
31
|
return;
|
|
31
32
|
}
|
|
32
33
|
// Tables don't exist, need to acquire lock and create them
|
|
@@ -128,6 +129,31 @@ const KVTables = (context) => ({
|
|
|
128
129
|
const results = await Promise.all(checkTablePromises);
|
|
129
130
|
return results.every((res) => res.rows[0].table !== null);
|
|
130
131
|
},
|
|
132
|
+
async migrate(client, appName) {
|
|
133
|
+
const schemaName = context.storeClient.safeName(appName);
|
|
134
|
+
const jobsTable = `${schemaName}.jobs`;
|
|
135
|
+
// v0.14.5: track updated_at on job status changes
|
|
136
|
+
const { rows } = await client.query(`SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_jobs_updated_at' LIMIT 1`);
|
|
137
|
+
if (rows.length === 0) {
|
|
138
|
+
await client.query(`
|
|
139
|
+
CREATE OR REPLACE FUNCTION ${schemaName}.update_jobs_updated_at()
|
|
140
|
+
RETURNS TRIGGER AS $$
|
|
141
|
+
BEGIN
|
|
142
|
+
IF NEW.status <> OLD.status THEN
|
|
143
|
+
NEW.updated_at = NOW();
|
|
144
|
+
END IF;
|
|
145
|
+
RETURN NEW;
|
|
146
|
+
END;
|
|
147
|
+
$$ LANGUAGE plpgsql;
|
|
148
|
+
`);
|
|
149
|
+
await client.query(`
|
|
150
|
+
DROP TRIGGER IF EXISTS trg_update_jobs_updated_at ON ${jobsTable};
|
|
151
|
+
CREATE TRIGGER trg_update_jobs_updated_at
|
|
152
|
+
BEFORE UPDATE ON ${jobsTable}
|
|
153
|
+
FOR EACH ROW EXECUTE FUNCTION ${schemaName}.update_jobs_updated_at();
|
|
154
|
+
`);
|
|
155
|
+
}
|
|
156
|
+
},
|
|
131
157
|
async createTables(client, appName) {
|
|
132
158
|
try {
|
|
133
159
|
await client.query('BEGIN');
|
|
@@ -302,6 +328,25 @@ const KVTables = (context) => ({
|
|
|
302
328
|
CREATE TRIGGER trg_enforce_live_job_uniqueness
|
|
303
329
|
BEFORE INSERT OR UPDATE ON ${fullTableName}
|
|
304
330
|
FOR EACH ROW EXECUTE PROCEDURE ${schemaName}.enforce_live_job_uniqueness();
|
|
331
|
+
`);
|
|
332
|
+
// Create function to update updated_at on status changes
|
|
333
|
+
await client.query(`
|
|
334
|
+
CREATE OR REPLACE FUNCTION ${schemaName}.update_jobs_updated_at()
|
|
335
|
+
RETURNS TRIGGER AS $$
|
|
336
|
+
BEGIN
|
|
337
|
+
IF NEW.status <> OLD.status THEN
|
|
338
|
+
NEW.updated_at = NOW();
|
|
339
|
+
END IF;
|
|
340
|
+
RETURN NEW;
|
|
341
|
+
END;
|
|
342
|
+
$$ LANGUAGE plpgsql;
|
|
343
|
+
`);
|
|
344
|
+
// Create trigger for updated_at on job status changes
|
|
345
|
+
await client.query(`
|
|
346
|
+
DROP TRIGGER IF EXISTS trg_update_jobs_updated_at ON ${fullTableName};
|
|
347
|
+
CREATE TRIGGER trg_update_jobs_updated_at
|
|
348
|
+
BEFORE UPDATE ON ${fullTableName}
|
|
349
|
+
FOR EACH ROW EXECUTE FUNCTION ${schemaName}.update_jobs_updated_at();
|
|
305
350
|
`);
|
|
306
351
|
// Create the attributes table with partitioning
|
|
307
352
|
const attributesTableName = `${fullTableName}_attributes`;
|
|
@@ -115,8 +115,31 @@ declare class PostgresStoreService extends StoreService<ProviderClient, Provider
|
|
|
115
115
|
getTransitions(appVersion: AppVID): Promise<Transitions>;
|
|
116
116
|
setHookRules(hookRules: Record<string, HookRule[]>): Promise<any>;
|
|
117
117
|
getHookRules(): Promise<Record<string, HookRule[]>>;
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
/**
|
|
119
|
+
* Leg1: set hook signal, atomically detecting a pending signal.
|
|
120
|
+
*
|
|
121
|
+
* Standalone (no transaction): single CTE query that reads any existing
|
|
122
|
+
* pending value, then inserts the hook signal (overwriting pending or
|
|
123
|
+
* expired entries). Returns `{success, pendingData}` in one round trip.
|
|
124
|
+
*
|
|
125
|
+
* In a transaction: queues the setnxex; pending detection deferred.
|
|
126
|
+
*/
|
|
127
|
+
setHookSignal(hook: HookSignal, transaction?: ProviderTransaction): Promise<{
|
|
128
|
+
success: boolean;
|
|
129
|
+
pendingData?: string;
|
|
130
|
+
}>;
|
|
131
|
+
/**
|
|
132
|
+
* Leg2: get hook signal OR atomically set a pending signal.
|
|
133
|
+
*
|
|
134
|
+
* When `pendingData` is provided and no hook signal exists, the
|
|
135
|
+
* pending value is inserted in the SAME SQL statement — no second
|
|
136
|
+
* round trip. This is the transactional edge that prevents the
|
|
137
|
+
* signal from being lost: by the time the query returns, the
|
|
138
|
+
* pending key is already visible to leg1's setnxex.
|
|
139
|
+
*
|
|
140
|
+
* When `pendingData` is omitted, behaves as a plain read.
|
|
141
|
+
*/
|
|
142
|
+
getHookSignal(topic: string, resolved: string, pendingData?: string, pendingExpire?: number): Promise<string | undefined>;
|
|
120
143
|
deleteHookSignal(topic: string, resolved: string): Promise<number | undefined>;
|
|
121
144
|
addTaskQueues(keys: string[]): Promise<void>;
|
|
122
145
|
getActiveTaskQueue(): Promise<string | null>;
|
|
@@ -754,16 +754,133 @@ class PostgresStoreService extends __1.StoreService {
|
|
|
754
754
|
return patterns;
|
|
755
755
|
}
|
|
756
756
|
}
|
|
757
|
+
/**
|
|
758
|
+
* Leg1: set hook signal, atomically detecting a pending signal.
|
|
759
|
+
*
|
|
760
|
+
* Standalone (no transaction): single CTE query that reads any existing
|
|
761
|
+
* pending value, then inserts the hook signal (overwriting pending or
|
|
762
|
+
* expired entries). Returns `{success, pendingData}` in one round trip.
|
|
763
|
+
*
|
|
764
|
+
* In a transaction: queues the setnxex; pending detection deferred.
|
|
765
|
+
*/
|
|
757
766
|
async setHookSignal(hook, transaction) {
|
|
758
767
|
const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
|
|
759
768
|
const { topic, resolved, jobId } = hook;
|
|
760
769
|
const signalKey = `${topic}:${resolved}`;
|
|
761
|
-
|
|
770
|
+
const fullKey = `${key}:${signalKey}`;
|
|
771
|
+
const delay = Math.max(hook.expire, enums_1.HMSH_SIGNAL_EXPIRE);
|
|
772
|
+
if (transaction) {
|
|
773
|
+
await this.kvsql(transaction).setnxex(fullKey, jobId, delay);
|
|
774
|
+
return { success: true };
|
|
775
|
+
}
|
|
776
|
+
const kv = this.kvsql();
|
|
777
|
+
const tableName = kv.tableForKey(fullKey);
|
|
778
|
+
const storedKey = kv.storageKey(fullKey);
|
|
779
|
+
const sql = `
|
|
780
|
+
WITH pre AS (
|
|
781
|
+
SELECT value FROM ${tableName}
|
|
782
|
+
WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
|
|
783
|
+
),
|
|
784
|
+
ins AS (
|
|
785
|
+
INSERT INTO ${tableName} (key, value, expiry)
|
|
786
|
+
VALUES ($1, $2, NOW() + INTERVAL '${delay} seconds')
|
|
787
|
+
ON CONFLICT (key) DO UPDATE
|
|
788
|
+
SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
|
|
789
|
+
WHERE ${tableName}.expiry IS NULL
|
|
790
|
+
OR ${tableName}.expiry <= NOW()
|
|
791
|
+
OR ${tableName}.value LIKE '$pending::%'
|
|
792
|
+
RETURNING true as success
|
|
793
|
+
)
|
|
794
|
+
SELECT
|
|
795
|
+
COALESCE((SELECT success FROM ins), false) as success,
|
|
796
|
+
(SELECT value FROM pre) as existing_value
|
|
797
|
+
`;
|
|
798
|
+
try {
|
|
799
|
+
const res = await this.pgClient.query(sql, [storedKey, jobId]);
|
|
800
|
+
const row = res.rows[0] || {};
|
|
801
|
+
const success = row.success === true;
|
|
802
|
+
const existing = row.existing_value;
|
|
803
|
+
if (success && existing?.startsWith('$pending::')) {
|
|
804
|
+
return {
|
|
805
|
+
success: true,
|
|
806
|
+
pendingData: existing.slice('$pending::'.length),
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
return { success };
|
|
810
|
+
}
|
|
811
|
+
catch (error) {
|
|
812
|
+
if (error?.message?.includes('closed') ||
|
|
813
|
+
error?.message?.includes('queryable')) {
|
|
814
|
+
return { success: false };
|
|
815
|
+
}
|
|
816
|
+
throw error;
|
|
817
|
+
}
|
|
762
818
|
}
|
|
763
|
-
|
|
819
|
+
/**
|
|
820
|
+
* Leg2: get hook signal OR atomically set a pending signal.
|
|
821
|
+
*
|
|
822
|
+
* When `pendingData` is provided and no hook signal exists, the
|
|
823
|
+
* pending value is inserted in the SAME SQL statement — no second
|
|
824
|
+
* round trip. This is the transactional edge that prevents the
|
|
825
|
+
* signal from being lost: by the time the query returns, the
|
|
826
|
+
* pending key is already visible to leg1's setnxex.
|
|
827
|
+
*
|
|
828
|
+
* When `pendingData` is omitted, behaves as a plain read.
|
|
829
|
+
*/
|
|
830
|
+
async getHookSignal(topic, resolved, pendingData, pendingExpire) {
|
|
764
831
|
const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
|
|
765
|
-
const
|
|
766
|
-
|
|
832
|
+
const fullKey = `${key}:${topic}:${resolved}`;
|
|
833
|
+
if (!pendingData) {
|
|
834
|
+
//plain read (used by deleteWebHookSignal path, tests, etc.)
|
|
835
|
+
const response = await this.kvsql().get(fullKey);
|
|
836
|
+
if (!response)
|
|
837
|
+
return undefined;
|
|
838
|
+
const value = response.toString();
|
|
839
|
+
if (value.startsWith('$pending::'))
|
|
840
|
+
return undefined;
|
|
841
|
+
return value;
|
|
842
|
+
}
|
|
843
|
+
//atomic get-or-set-pending: one round trip
|
|
844
|
+
const kv = this.kvsql();
|
|
845
|
+
const tableName = kv.tableForKey(fullKey);
|
|
846
|
+
const storedKey = kv.storageKey(fullKey);
|
|
847
|
+
const expire = pendingExpire || enums_1.HMSH_PENDING_SIGNAL_EXPIRE;
|
|
848
|
+
const pendingValue = `$pending::${pendingData}`;
|
|
849
|
+
const sql = `
|
|
850
|
+
WITH existing AS (
|
|
851
|
+
SELECT value FROM ${tableName}
|
|
852
|
+
WHERE key = $1 AND (expiry IS NULL OR expiry > NOW())
|
|
853
|
+
),
|
|
854
|
+
pending AS (
|
|
855
|
+
INSERT INTO ${tableName} (key, value, expiry)
|
|
856
|
+
SELECT $1, $2, NOW() + INTERVAL '${expire} seconds'
|
|
857
|
+
WHERE NOT EXISTS (SELECT 1 FROM existing)
|
|
858
|
+
ON CONFLICT (key) DO UPDATE
|
|
859
|
+
SET value = EXCLUDED.value, expiry = EXCLUDED.expiry
|
|
860
|
+
WHERE ${tableName}.expiry IS NULL OR ${tableName}.expiry <= NOW()
|
|
861
|
+
RETURNING true as inserted
|
|
862
|
+
)
|
|
863
|
+
SELECT
|
|
864
|
+
(SELECT value FROM existing) as hook_value,
|
|
865
|
+
(SELECT inserted FROM pending) as pending_inserted
|
|
866
|
+
`;
|
|
867
|
+
try {
|
|
868
|
+
const res = await this.pgClient.query(sql, [storedKey, pendingValue]);
|
|
869
|
+
const row = res.rows[0] || {};
|
|
870
|
+
const hookValue = row.hook_value;
|
|
871
|
+
if (hookValue && !hookValue.startsWith('$pending::')) {
|
|
872
|
+
return hookValue;
|
|
873
|
+
}
|
|
874
|
+
//no hook signal; pending was inserted (or already existed)
|
|
875
|
+
return undefined;
|
|
876
|
+
}
|
|
877
|
+
catch (error) {
|
|
878
|
+
if (error?.message?.includes('closed') ||
|
|
879
|
+
error?.message?.includes('queryable')) {
|
|
880
|
+
return undefined;
|
|
881
|
+
}
|
|
882
|
+
throw error;
|
|
883
|
+
}
|
|
767
884
|
}
|
|
768
885
|
async deleteHookSignal(topic, resolved) {
|
|
769
886
|
const key = this.mintKey(key_1.KeyType.SIGNALS, { appId: this.appId });
|
|
@@ -22,6 +22,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
22
22
|
batchSize?: number;
|
|
23
23
|
blockTimeout?: number;
|
|
24
24
|
autoAck?: boolean;
|
|
25
|
+
reservationTimeout?: number;
|
|
25
26
|
enableBackoff?: boolean;
|
|
26
27
|
initialBackoff?: number;
|
|
27
28
|
maxBackoff?: number;
|
|
@@ -41,6 +42,7 @@ export declare abstract class StreamService<ClientProvider extends ProviderClien
|
|
|
41
42
|
maxRetries?: number;
|
|
42
43
|
limit?: number;
|
|
43
44
|
}): Promise<StreamMessage[]>;
|
|
45
|
+
reservationTimeout: number;
|
|
44
46
|
abstract getStreamStats(streamName: string): Promise<StreamStats>;
|
|
45
47
|
abstract getStreamDepth(streamName: string): Promise<number>;
|
|
46
48
|
abstract getStreamDepths(streamName: {
|
|
@@ -3,6 +3,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.StreamService = void 0;
|
|
4
4
|
class StreamService {
|
|
5
5
|
constructor(streamClient, storeClient, config = {}) {
|
|
6
|
+
// Adaptive reservation timeout — set by the consumption manager
|
|
7
|
+
// based on stream depth. Providers read this when reserving messages.
|
|
8
|
+
this.reservationTimeout = 30;
|
|
6
9
|
this.streamClient = streamClient;
|
|
7
10
|
this.storeClient = storeClient;
|
|
8
11
|
this.config = config;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.retryMessages = exports.deadLetterMessages = exports.ackAndDelete = exports.deleteMessages = exports.acknowledgeMessages = exports.fetchMessages = exports.buildPublishSQL = exports.publishMessages = void 0;
|
|
4
|
+
const enums_1 = require("../../../../modules/enums");
|
|
4
5
|
const utils_1 = require("../../../../modules/utils");
|
|
5
6
|
/**
|
|
6
7
|
* Publish messages to a stream. Can be used within a transaction.
|
|
@@ -205,7 +206,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
|
|
|
205
206
|
while (retries < maxRetries) {
|
|
206
207
|
retries++;
|
|
207
208
|
const batchSize = options?.batchSize || 1;
|
|
208
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
209
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
209
210
|
const res = await client.query(`UPDATE ${tableName}
|
|
210
211
|
SET reserved_at = NOW(), reserved_by = $3
|
|
211
212
|
WHERE id IN (
|
|
@@ -79,12 +79,12 @@ class PostgresStreamService extends index_1.StreamService {
|
|
|
79
79
|
}
|
|
80
80
|
async checkForMissedMessages() {
|
|
81
81
|
await this.notificationManager.checkForMissedMessages(async (instance, consumer) => {
|
|
82
|
-
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
82
|
+
return await instance.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: instance.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
83
83
|
});
|
|
84
84
|
}
|
|
85
85
|
async fetchAndDeliverMessages(consumer) {
|
|
86
86
|
try {
|
|
87
|
-
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, enableBackoff: false, maxRetries: 1 });
|
|
87
|
+
const messages = await this.fetchMessages(consumer.streamName, consumer.groupName, consumer.consumerName, { batchSize: 10, reservationTimeout: this.reservationTimeout, enableBackoff: false, maxRetries: 1 });
|
|
88
88
|
if (messages.length > 0) {
|
|
89
89
|
consumer.callback(messages);
|
|
90
90
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
10
|
exports.publishMessagesSecured = exports.deadLetterMessagesSecured = exports.ackAndDeleteSecured = exports.fetchMessagesSecured = void 0;
|
|
11
|
+
const enums_1 = require("../../../../modules/enums");
|
|
11
12
|
const utils_1 = require("../../../../modules/utils");
|
|
12
13
|
const utils_2 = require("../../../../modules/utils");
|
|
13
14
|
/**
|
|
@@ -19,7 +20,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
|
|
|
19
20
|
const maxBackoff = options?.maxBackoff ?? 3000;
|
|
20
21
|
const maxRetries = options?.maxRetries ?? 3;
|
|
21
22
|
const batchSize = options?.batchSize || 1;
|
|
22
|
-
const reservationTimeout = options?.reservationTimeout ||
|
|
23
|
+
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
23
24
|
let backoff = initialBackoff;
|
|
24
25
|
let retries = 0;
|
|
25
26
|
try {
|
|
@@ -29,7 +29,10 @@ declare class TaskService {
|
|
|
29
29
|
processTimeHooks(timeEventCallback: (jobId: string, gId: string, activityId: string, type: WorkListTaskType) => Promise<void>, listKey?: string): Promise<void>;
|
|
30
30
|
cancelCleanup(): void;
|
|
31
31
|
getHookRule(topic: string): Promise<HookRule | undefined>;
|
|
32
|
-
registerWebHook(topic: string, context: JobState, dad: string, expire: number
|
|
32
|
+
registerWebHook(topic: string, context: JobState, dad: string, expire: number): Promise<{
|
|
33
|
+
jobId: string;
|
|
34
|
+
pending?: string;
|
|
35
|
+
}>;
|
|
33
36
|
processWebHookSignal(topic: string, data: Record<string, unknown>): Promise<[string, string, string, string] | undefined>;
|
|
34
37
|
deleteWebHookSignal(topic: string, data: Record<string, unknown>): Promise<number>;
|
|
35
38
|
/**
|
|
@@ -134,7 +134,7 @@ class TaskService {
|
|
|
134
134
|
const rules = await this.store.getHookRules();
|
|
135
135
|
return rules?.[topic]?.[0];
|
|
136
136
|
}
|
|
137
|
-
async registerWebHook(topic, context, dad, expire
|
|
137
|
+
async registerWebHook(topic, context, dad, expire) {
|
|
138
138
|
const hookRule = await this.getHookRule(topic);
|
|
139
139
|
if (hookRule) {
|
|
140
140
|
const mapExpression = hookRule.conditions.match[0].expected;
|
|
@@ -150,8 +150,27 @@ class TaskService {
|
|
|
150
150
|
jobId: compositeJobKey,
|
|
151
151
|
expire,
|
|
152
152
|
};
|
|
153
|
-
|
|
154
|
-
return
|
|
153
|
+
//called standalone (no transaction) so the single CTE query can
|
|
154
|
+
//atomically detect and return pending signal data on collision
|
|
155
|
+
const result = await this.store.setHookSignal(hook);
|
|
156
|
+
if (result.pendingData) {
|
|
157
|
+
this.logger.warn('task-signal-race-pending-consumed', {
|
|
158
|
+
topic,
|
|
159
|
+
resolved,
|
|
160
|
+
jobId,
|
|
161
|
+
});
|
|
162
|
+
return { jobId, pending: result.pendingData };
|
|
163
|
+
}
|
|
164
|
+
if (!result.success) {
|
|
165
|
+
//setnxex failed but no pending signal; likely a retry where
|
|
166
|
+
//our own hook signal was already set. continue normally.
|
|
167
|
+
this.logger.debug('task-signal-hook-already-set', {
|
|
168
|
+
topic,
|
|
169
|
+
resolved,
|
|
170
|
+
jobId,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
return { jobId };
|
|
155
174
|
}
|
|
156
175
|
else {
|
|
157
176
|
throw new Error('signaler.registerWebHook:error: hook rule not found');
|
|
@@ -165,10 +184,19 @@ class TaskService {
|
|
|
165
184
|
const context = { $self: { hook: { data } }, $hook: { data } };
|
|
166
185
|
const mapExpression = hookRule.conditions.match[0].actual;
|
|
167
186
|
const resolved = pipe_1.Pipe.resolve(mapExpression, context);
|
|
168
|
-
|
|
187
|
+
//resolve $expire override from the signal data (e.g., '1h', '30d')
|
|
188
|
+
const pendingExpire = typeof data.$expire === 'string'
|
|
189
|
+
? (0, utils_1.s)(data.$expire)
|
|
190
|
+
: enums_1.HMSH_PENDING_SIGNAL_EXPIRE;
|
|
191
|
+
//atomic: returns the hook signal, or stores a pending signal
|
|
192
|
+
//in the same SQL statement if no hook is registered yet
|
|
193
|
+
const hookSignalId = await this.store.getHookSignal(topic, resolved, JSON.stringify(data), pendingExpire);
|
|
169
194
|
if (!hookSignalId) {
|
|
170
|
-
|
|
171
|
-
|
|
195
|
+
this.logger.warn('task-signal-race-pending-stored', {
|
|
196
|
+
topic,
|
|
197
|
+
resolved,
|
|
198
|
+
expire: pendingExpire,
|
|
199
|
+
});
|
|
172
200
|
return undefined;
|
|
173
201
|
}
|
|
174
202
|
//`aid` is part of composite key, but the hook `topic` is its public interface;
|
package/build/types/dba.d.ts
CHANGED
|
@@ -103,6 +103,15 @@ export interface PruneOptions {
|
|
|
103
103
|
* @default false
|
|
104
104
|
*/
|
|
105
105
|
keepHmark?: boolean;
|
|
106
|
+
/**
|
|
107
|
+
* If true, hard-deletes expired rows from `signal_registry`.
|
|
108
|
+
* These include consumed hook signals and stale pending signals
|
|
109
|
+
* (signals that arrived before hook registration but were never
|
|
110
|
+
* claimed). All signal_registry entries have a natural `expiry`
|
|
111
|
+
* column; this operation removes rows whose expiry has passed.
|
|
112
|
+
* @default true
|
|
113
|
+
*/
|
|
114
|
+
signals?: boolean;
|
|
106
115
|
}
|
|
107
116
|
/**
|
|
108
117
|
* Result returned by `DBA.prune()`, providing deletion
|
|
@@ -123,4 +132,6 @@ export interface PruneResult {
|
|
|
123
132
|
transient: number;
|
|
124
133
|
/** Number of jobs marked as pruned (pruned_at set) */
|
|
125
134
|
marked: number;
|
|
135
|
+
/** Number of expired signal_registry rows hard-deleted */
|
|
136
|
+
signals: number;
|
|
126
137
|
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hotmeshio/hotmesh",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.6",
|
|
4
4
|
"description": "Durable Workflow",
|
|
5
5
|
"main": "./build/index.js",
|
|
6
6
|
"types": "./build/index.d.ts",
|
|
7
|
-
"homepage": "https://
|
|
7
|
+
"homepage": "https://docs.hotmesh.io/",
|
|
8
8
|
"publishConfig": {
|
|
9
9
|
"access": "public"
|
|
10
10
|
},
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"test:durable:postgres": "HMSH_LOGLEVEL=info vitest run tests/durable",
|
|
31
31
|
"test:durable:basic": "HMSH_LOGLEVEL=info vitest run tests/durable/basic/postgres.test.ts",
|
|
32
32
|
"test:durable:collision": "vitest run tests/durable/collision/postgres.test.ts",
|
|
33
|
+
"test:durable:contention": "vitest run tests/durable/contention/postgres.test.ts",
|
|
33
34
|
"test:durable:fatal": "vitest run tests/durable/fatal",
|
|
34
35
|
"test:durable:goodbye": "HMSH_LOGLEVEL=debug vitest run tests/durable/goodbye/postgres.test.ts",
|
|
35
36
|
"test:durable:interceptor": "HMSH_LOGLEVEL=info vitest run tests/durable/interceptor/postgres.test.ts",
|