@hotmeshio/hotmesh 0.19.0 → 0.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/modules/enums.d.ts +1 -0
- package/build/modules/enums.js +3 -2
- package/build/modules/errors.d.ts +27 -1
- package/build/modules/errors.js +58 -1
- package/build/package.json +1 -1
- package/build/services/activities/activity/process.js +14 -2
- package/build/services/collator/index.js +8 -0
- package/build/services/engine/dispatch.js +20 -1
- package/build/services/router/consumption/index.d.ts +1 -0
- package/build/services/router/consumption/index.js +58 -9
- package/build/services/stream/index.js +3 -1
- package/build/services/stream/providers/postgres/kvtables.js +6 -11
- package/build/services/stream/providers/postgres/messages.js +1 -1
- package/build/services/stream/providers/postgres/secured.js +1 -1
- package/package.json +1 -1
package/build/modules/enums.d.ts
CHANGED
|
@@ -133,6 +133,7 @@ export declare const MAX_STREAM_RETRIES: number;
|
|
|
133
133
|
export declare const MAX_DELAY = 2147483647;
|
|
134
134
|
export declare const HMSH_MAX_RETRIES: number;
|
|
135
135
|
export declare const HMSH_POISON_MESSAGE_THRESHOLD: number;
|
|
136
|
+
export declare const HMSH_MAX_CYCLES: number;
|
|
136
137
|
export declare const HMSH_MAX_TIMEOUT_MS: number;
|
|
137
138
|
export declare const HMSH_GRADUATED_INTERVAL_MS: number;
|
|
138
139
|
/**
|
package/build/modules/enums.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = void 0;
|
|
3
|
+
exports.HMSH_RESERVATION_TIMEOUT_S = exports.HMSH_ENGINE_CONCURRENCY = exports.HMSH_BATCH_SIZE_MIN = exports.HMSH_BATCH_SIZE = exports.HMSH_XPENDING_COUNT = exports.HMSH_XCLAIM_COUNT = exports.HMSH_XCLAIM_DELAY_MS = exports.HMSH_BLOCK_TIME_MS = exports.HMSH_DURABLE_INITIAL_INTERVAL = exports.HMSH_DURABLE_EXP_BACKOFF = exports.HMSH_DURABLE_MAX_INTERVAL = exports.HMSH_DURABLE_MAX_ATTEMPTS = exports.HMSH_GRADUATED_INTERVAL_MS = exports.HMSH_MAX_TIMEOUT_MS = exports.HMSH_MAX_CYCLES = exports.HMSH_POISON_MESSAGE_THRESHOLD = exports.HMSH_MAX_RETRIES = exports.MAX_DELAY = exports.MAX_STREAM_RETRIES = exports.INITIAL_STREAM_BACKOFF = exports.MAX_STREAM_BACKOFF = exports.HMSH_EXPIRE_JOB_SECONDS = exports.HMSH_OTT_WAIT_TIME = exports.HMSH_DEPLOYMENT_PAUSE = exports.HMSH_DEPLOYMENT_DELAY = exports.HMSH_ACTIVATION_MAX_RETRY = exports.HMSH_QUORUM_DELAY_MS = exports.HMSH_QUORUM_ROLLCALL_CYCLES = exports.HMSH_STATUS_UNKNOWN = exports.HMSH_CODE_DURABLE_RETRYABLE = exports.HMSH_CODE_DURABLE_FATAL = exports.HMSH_CODE_DURABLE_MAXED = exports.HMSH_CODE_DURABLE_TIMEOUT = exports.HMSH_CODE_DURABLE_WAIT = exports.HMSH_CODE_DURABLE_CONTINUE = exports.HMSH_CODE_DURABLE_PROXY = exports.HMSH_CODE_DURABLE_CHILD = exports.HMSH_CODE_DURABLE_ALL = exports.HMSH_CODE_DURABLE_SLEEP = exports.HMSH_CODE_UNACKED = exports.HMSH_CODE_TIMEOUT = exports.HMSH_CODE_UNKNOWN = exports.HMSH_CODE_INTERRUPT = exports.HMSH_CODE_NOTFOUND = exports.HMSH_CODE_PENDING = exports.HMSH_CODE_SUCCESS = exports.HMSH_PENDING_SIGNAL_EXPIRE = exports.HMSH_SIGNAL_EXPIRE = exports.HMSH_TELEMETRY = exports.HMSH_LOGLEVEL = void 0;
|
|
4
|
+
exports.HMSH_ROUTER_POLL_FALLBACK_INTERVAL = exports.HMSH_NOTIFY_PAYLOAD_LIMIT = exports.DEFAULT_TASK_QUEUE = exports.HMSH_GUID_SIZE = exports.HMSH_ROUTER_SCOUT_INTERVAL_MS = exports.HMSH_ROUTER_SCOUT_INTERVAL_SECONDS = exports.HMSH_SCOUT_INTERVAL_SECONDS = exports.HMSH_FIDELITY_SECONDS = exports.HMSH_EXPIRE_DURATION = exports.HMSH_RESERVATION_TIMEOUT_MAX_S = void 0;
|
|
5
5
|
/**
|
|
6
6
|
* Determines the log level for the application. The default is 'info'.
|
|
7
7
|
*/
|
|
@@ -143,6 +143,7 @@ exports.MAX_STREAM_RETRIES = parseInt(process.env.MAX_STREAM_RETRIES, 10) || 2;
|
|
|
143
143
|
exports.MAX_DELAY = 2147483647; // Maximum allowed delay in milliseconds for setTimeout
|
|
144
144
|
exports.HMSH_MAX_RETRIES = parseInt(process.env.HMSH_MAX_RETRIES, 10) || 3;
|
|
145
145
|
exports.HMSH_POISON_MESSAGE_THRESHOLD = parseInt(process.env.HMSH_POISON_MESSAGE_THRESHOLD, 10) || 5;
|
|
146
|
+
exports.HMSH_MAX_CYCLES = parseInt(process.env.HMSH_MAX_CYCLES, 10) || 10000;
|
|
146
147
|
exports.HMSH_MAX_TIMEOUT_MS = parseInt(process.env.HMSH_MAX_TIMEOUT_MS, 10) || 60000;
|
|
147
148
|
exports.HMSH_GRADUATED_INTERVAL_MS = parseInt(process.env.HMSH_GRADUATED_INTERVAL_MS, 10) || 5000;
|
|
148
149
|
// DURABLE
|
|
@@ -1,6 +1,25 @@
|
|
|
1
1
|
import { ActivityDuplex } from '../types/activity';
|
|
2
2
|
import { CollationFaultType, CollationStage } from '../types/collator';
|
|
3
3
|
import { DurableChildErrorType, DurableContinueAsNewErrorType, DurableProxyErrorType, DurableSleepErrorType, DurableWaitForAllErrorType, DurableWaitForErrorType } from '../types/error';
|
|
4
|
+
/**
|
|
5
|
+
* Error classification for dispatcher logging.
|
|
6
|
+
*
|
|
7
|
+
* FATAL — lease expired, invariant violation, corrupt state.
|
|
8
|
+
* The activity must stop immediately; message is NOT acked.
|
|
9
|
+
* RETRYABLE — transient infrastructure error (DB timeout, network).
|
|
10
|
+
* Normal retry/backoff applies.
|
|
11
|
+
* TERMINAL — permanent failure (user code threw, max retries exceeded).
|
|
12
|
+
* Message is acked; job is marked failed.
|
|
13
|
+
* COLLATION — duplicate delivery detected via GUID ledger.
|
|
14
|
+
* Silent ack; no work needed.
|
|
15
|
+
*/
|
|
16
|
+
export declare enum ErrorCategory {
|
|
17
|
+
FATAL = "fatal",
|
|
18
|
+
RETRYABLE = "retryable",
|
|
19
|
+
TERMINAL = "terminal",
|
|
20
|
+
COLLATION = "collation"
|
|
21
|
+
}
|
|
22
|
+
export declare function classifyError(error: unknown): ErrorCategory;
|
|
4
23
|
declare class GetStateError extends Error {
|
|
5
24
|
jobId: string;
|
|
6
25
|
code: number;
|
|
@@ -139,6 +158,13 @@ declare class GenerationalError extends Error {
|
|
|
139
158
|
declare class ExecActivityError extends Error {
|
|
140
159
|
constructor();
|
|
141
160
|
}
|
|
161
|
+
declare class LeaseExpiredError extends Error {
|
|
162
|
+
code: number;
|
|
163
|
+
type: string;
|
|
164
|
+
deadlineMs: number;
|
|
165
|
+
reservationTimeoutS: number;
|
|
166
|
+
constructor(deadlineMs: number, reservationTimeoutS: number);
|
|
167
|
+
}
|
|
142
168
|
declare class CollationError extends Error {
|
|
143
169
|
status: number;
|
|
144
170
|
leg: ActivityDuplex;
|
|
@@ -146,4 +172,4 @@ declare class CollationError extends Error {
|
|
|
146
172
|
fault: CollationFaultType;
|
|
147
173
|
constructor(status: number, leg: ActivityDuplex, stage: CollationStage, fault?: CollationFaultType);
|
|
148
174
|
}
|
|
149
|
-
export { CollationError, DurableChildError, DurableContinueAsNewError, DurableFatalError, DurableMaxedError, DurableProxyError, DurableRetryError, DurableSleepError, DurableTimeoutError, DurableWaitForAllError, DurableWaitForError, DuplicateJobError, ExecActivityError, GenerationalError, GetStateError, InactiveJobError, MapDataError, RegisterTimeoutError, SetStateError, };
|
|
175
|
+
export { CollationError, DurableChildError, DurableContinueAsNewError, DurableFatalError, DurableMaxedError, DurableProxyError, DurableRetryError, DurableSleepError, DurableTimeoutError, DurableWaitForAllError, DurableWaitForError, DuplicateJobError, ExecActivityError, GenerationalError, GetStateError, InactiveJobError, LeaseExpiredError, MapDataError, RegisterTimeoutError, SetStateError, };
|
package/build/modules/errors.js
CHANGED
|
@@ -1,7 +1,53 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.SetStateError = exports.RegisterTimeoutError = exports.MapDataError = exports.InactiveJobError = exports.GetStateError = exports.GenerationalError = exports.ExecActivityError = exports.DuplicateJobError = exports.DurableWaitForError = exports.DurableWaitForAllError = exports.DurableTimeoutError = exports.DurableSleepError = exports.DurableRetryError = exports.DurableProxyError = exports.DurableMaxedError = exports.DurableFatalError = exports.DurableContinueAsNewError = exports.DurableChildError = exports.CollationError = void 0;
|
|
3
|
+
exports.SetStateError = exports.RegisterTimeoutError = exports.MapDataError = exports.LeaseExpiredError = exports.InactiveJobError = exports.GetStateError = exports.GenerationalError = exports.ExecActivityError = exports.DuplicateJobError = exports.DurableWaitForError = exports.DurableWaitForAllError = exports.DurableTimeoutError = exports.DurableSleepError = exports.DurableRetryError = exports.DurableProxyError = exports.DurableMaxedError = exports.DurableFatalError = exports.DurableContinueAsNewError = exports.DurableChildError = exports.CollationError = exports.classifyError = exports.ErrorCategory = void 0;
|
|
4
4
|
const enums_1 = require("./enums");
|
|
5
|
+
/**
|
|
6
|
+
* Error classification for dispatcher logging.
|
|
7
|
+
*
|
|
8
|
+
* FATAL — lease expired, invariant violation, corrupt state.
|
|
9
|
+
* The activity must stop immediately; message is NOT acked.
|
|
10
|
+
* RETRYABLE — transient infrastructure error (DB timeout, network).
|
|
11
|
+
* Normal retry/backoff applies.
|
|
12
|
+
* TERMINAL — permanent failure (user code threw, max retries exceeded).
|
|
13
|
+
* Message is acked; job is marked failed.
|
|
14
|
+
* COLLATION — duplicate delivery detected via GUID ledger.
|
|
15
|
+
* Silent ack; no work needed.
|
|
16
|
+
*/
|
|
17
|
+
var ErrorCategory;
|
|
18
|
+
(function (ErrorCategory) {
|
|
19
|
+
ErrorCategory["FATAL"] = "fatal";
|
|
20
|
+
ErrorCategory["RETRYABLE"] = "retryable";
|
|
21
|
+
ErrorCategory["TERMINAL"] = "terminal";
|
|
22
|
+
ErrorCategory["COLLATION"] = "collation";
|
|
23
|
+
})(ErrorCategory = exports.ErrorCategory || (exports.ErrorCategory = {}));
|
|
24
|
+
function classifyError(error) {
|
|
25
|
+
if (error instanceof LeaseExpiredError) {
|
|
26
|
+
return ErrorCategory.FATAL;
|
|
27
|
+
}
|
|
28
|
+
if (error instanceof CollationError || error instanceof DuplicateJobError) {
|
|
29
|
+
return ErrorCategory.COLLATION;
|
|
30
|
+
}
|
|
31
|
+
if (error instanceof DurableRetryError) {
|
|
32
|
+
return ErrorCategory.RETRYABLE;
|
|
33
|
+
}
|
|
34
|
+
if (error instanceof DurableTimeoutError) {
|
|
35
|
+
return ErrorCategory.RETRYABLE;
|
|
36
|
+
}
|
|
37
|
+
if (error instanceof DurableFatalError ||
|
|
38
|
+
error instanceof DurableMaxedError) {
|
|
39
|
+
return ErrorCategory.TERMINAL;
|
|
40
|
+
}
|
|
41
|
+
if (error instanceof InactiveJobError ||
|
|
42
|
+
error instanceof GenerationalError ||
|
|
43
|
+
error instanceof GetStateError) {
|
|
44
|
+
return ErrorCategory.TERMINAL;
|
|
45
|
+
}
|
|
46
|
+
// Unknown errors default to retryable — the retry budget
|
|
47
|
+
// will promote them to terminal if they persist.
|
|
48
|
+
return ErrorCategory.RETRYABLE;
|
|
49
|
+
}
|
|
50
|
+
exports.classifyError = classifyError;
|
|
5
51
|
class GetStateError extends Error {
|
|
6
52
|
constructor(jobId) {
|
|
7
53
|
super(`${jobId} Not Found`);
|
|
@@ -207,6 +253,17 @@ class ExecActivityError extends Error {
|
|
|
207
253
|
}
|
|
208
254
|
}
|
|
209
255
|
exports.ExecActivityError = ExecActivityError;
|
|
256
|
+
class LeaseExpiredError extends Error {
|
|
257
|
+
constructor(deadlineMs, reservationTimeoutS) {
|
|
258
|
+
super(`Activity exceeded lease deadline (${deadlineMs}ms of ${reservationTimeoutS}s reservation). ` +
|
|
259
|
+
`Aborting to prevent unauthorized writes after lease expiry.`);
|
|
260
|
+
this.type = 'LeaseExpiredError';
|
|
261
|
+
this.code = enums_1.HMSH_CODE_DURABLE_FATAL;
|
|
262
|
+
this.deadlineMs = deadlineMs;
|
|
263
|
+
this.reservationTimeoutS = reservationTimeoutS;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
exports.LeaseExpiredError = LeaseExpiredError;
|
|
210
267
|
class CollationError extends Error {
|
|
211
268
|
constructor(status, leg, stage, fault) {
|
|
212
269
|
super('collation-error');
|
package/build/package.json
CHANGED
|
@@ -71,6 +71,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
71
71
|
telemetry.setActivityAttributes({});
|
|
72
72
|
}
|
|
73
73
|
catch (error) {
|
|
74
|
+
const category = (0, errors_1.classifyError)(error);
|
|
74
75
|
if (error instanceof errors_1.CollationError) {
|
|
75
76
|
//FORBIDDEN: Leg1 not complete — should not occur after the fix
|
|
76
77
|
//that moved setHookSignal to post-commit. If seen, it indicates
|
|
@@ -78,6 +79,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
78
79
|
//retry in processWebHookEvent can attempt recovery.
|
|
79
80
|
if (error.fault === collator_1.CollationFaultType.FORBIDDEN) {
|
|
80
81
|
instance.logger.warn('process-event-forbidden-retry', {
|
|
82
|
+
category,
|
|
81
83
|
jid: instance.context.metadata.jid,
|
|
82
84
|
aid: instance.metadata.aid,
|
|
83
85
|
message: 'Leg1 not committed yet; rethrowing for stream retry',
|
|
@@ -98,6 +100,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
98
100
|
collationErrorCount++;
|
|
99
101
|
if (collationErrorCount === COLLATION_WARN_THRESHOLD) {
|
|
100
102
|
instance.logger.warn('process-event-collation-rate-exceeded', {
|
|
103
|
+
category,
|
|
101
104
|
count: collationErrorCount,
|
|
102
105
|
windowMs: COLLATION_WINDOW_MS,
|
|
103
106
|
reservationTimeoutS: enums_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
@@ -108,6 +111,7 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
108
111
|
});
|
|
109
112
|
}
|
|
110
113
|
instance.logger.warn(`process-event-${error.fault}-error`, {
|
|
114
|
+
category,
|
|
111
115
|
jid: instance.context.metadata.jid,
|
|
112
116
|
aid: instance.metadata.aid,
|
|
113
117
|
error,
|
|
@@ -115,20 +119,28 @@ async function processEvent(instance, status = stream_1.StreamStatus.SUCCESS, co
|
|
|
115
119
|
return;
|
|
116
120
|
}
|
|
117
121
|
else if (error instanceof errors_1.InactiveJobError) {
|
|
118
|
-
instance.logger.info('process-event-inactive-job-error', {
|
|
122
|
+
instance.logger.info('process-event-inactive-job-error', {
|
|
123
|
+
category,
|
|
124
|
+
error,
|
|
125
|
+
});
|
|
119
126
|
return;
|
|
120
127
|
}
|
|
121
128
|
else if (error instanceof errors_1.GenerationalError) {
|
|
122
129
|
instance.logger.info('process-event-generational-job-error', {
|
|
130
|
+
category,
|
|
123
131
|
error,
|
|
124
132
|
});
|
|
125
133
|
return;
|
|
126
134
|
}
|
|
127
135
|
else if (error instanceof errors_1.GetStateError) {
|
|
128
|
-
instance.logger.info('process-event-get-job-error', {
|
|
136
|
+
instance.logger.info('process-event-get-job-error', {
|
|
137
|
+
category,
|
|
138
|
+
error,
|
|
139
|
+
});
|
|
129
140
|
return;
|
|
130
141
|
}
|
|
131
142
|
instance.logger.error('activity-process-event-error', {
|
|
143
|
+
category,
|
|
132
144
|
error,
|
|
133
145
|
message: error.message,
|
|
134
146
|
stack: error.stack,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.CollatorService = void 0;
|
|
4
4
|
const errors_1 = require("../../modules/errors");
|
|
5
|
+
const enums_1 = require("../../modules/enums");
|
|
5
6
|
const collator_1 = require("../../types/collator");
|
|
6
7
|
class CollatorService {
|
|
7
8
|
/**
|
|
@@ -38,6 +39,13 @@ class CollatorService {
|
|
|
38
39
|
const ancestors = activity.config.ancestors;
|
|
39
40
|
const ancestorIndex = ancestors.indexOf(targetActivityId);
|
|
40
41
|
const dimensions = activity.metadata.dad.split(','); //e.g., `,0,0,1,0`
|
|
42
|
+
// Safety cap: prevent infinite cycle loops
|
|
43
|
+
const currentCycle = parseInt(dimensions[ancestorIndex] || '0', 10);
|
|
44
|
+
if (currentCycle >= enums_1.HMSH_MAX_CYCLES) {
|
|
45
|
+
throw new Error(`Cycle limit exceeded for job ${activity.context.metadata.jid} ` +
|
|
46
|
+
`(${currentCycle} >= HMSH_MAX_CYCLES=${enums_1.HMSH_MAX_CYCLES}) at DAD ${activity.metadata.dad}. ` +
|
|
47
|
+
`Set HMSH_MAX_CYCLES env var to increase the limit.`);
|
|
48
|
+
}
|
|
41
49
|
dimensions.length = ancestorIndex + 1;
|
|
42
50
|
dimensions.push('0');
|
|
43
51
|
return dimensions.join(',');
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
*/
|
|
17
17
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
18
|
exports.processStreamMessage = void 0;
|
|
19
|
+
const errors_1 = require("../../modules/errors");
|
|
19
20
|
const stream_1 = require("../../types/stream");
|
|
20
21
|
async function processStreamMessage(instance, streamData) {
|
|
21
22
|
instance.logger.debug('engine-process', {
|
|
@@ -86,7 +87,25 @@ async function dispatchAwait(instance, streamData, context) {
|
|
|
86
87
|
spn: streamData.metadata.spn,
|
|
87
88
|
};
|
|
88
89
|
const handler = (await instance.initActivity(streamData.metadata.topic, streamData.data, context));
|
|
89
|
-
|
|
90
|
+
try {
|
|
91
|
+
await handler.process();
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
if (error instanceof errors_1.DuplicateJobError) {
|
|
95
|
+
// The child workflow already exists from a prior spawn attempt
|
|
96
|
+
// (crash recovery). This AWAIT message is a replay — the child
|
|
97
|
+
// will deliver its RESULT back to the parent via the normal path.
|
|
98
|
+
// Acknowledge the message so it doesn't loop.
|
|
99
|
+
instance.logger.info('dispatch-await-child-exists', {
|
|
100
|
+
category: (0, errors_1.classifyError)(error),
|
|
101
|
+
childJobId: error.jobId,
|
|
102
|
+
parentJobId: streamData.metadata.jid,
|
|
103
|
+
parentDad: streamData.metadata.dad,
|
|
104
|
+
});
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
throw error;
|
|
108
|
+
}
|
|
90
109
|
}
|
|
91
110
|
async function dispatchResult(instance, streamData, context) {
|
|
92
111
|
const handler = (await instance.initActivity(`.${context.metadata.aid}`, streamData.data, context));
|
|
@@ -32,6 +32,7 @@ export declare class ConsumptionManager<S extends StreamService<ProviderClient,
|
|
|
32
32
|
private static readonly DEPTH_CHECK_INTERVAL_MS;
|
|
33
33
|
private static readonly DEPTH_SCALE_UP_THRESHOLD;
|
|
34
34
|
private static readonly DEPTH_SCALE_DOWN_THRESHOLD;
|
|
35
|
+
private static readonly LEASE_BUFFER_S;
|
|
35
36
|
constructor(stream: S, logger: ILogger, throttleManager: ThrottleManager, errorHandler: ErrorHandler, lifecycleManager: LifecycleManager<S>, reclaimDelay: number, reclaimCount: number, appId: string, role: any, router: any, retry?: import('../../../types/stream').RetryPolicy);
|
|
36
37
|
/**
|
|
37
38
|
* Adjusts reservation timeout based on stream depth. Called periodically
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ConsumptionManager = void 0;
|
|
4
4
|
const utils_1 = require("../../../modules/utils");
|
|
5
|
+
const errors_1 = require("../../../modules/errors");
|
|
5
6
|
const telemetry_1 = require("../telemetry");
|
|
6
7
|
const config_1 = require("../config");
|
|
7
8
|
const stream_1 = require("../../../types/stream");
|
|
@@ -65,7 +66,8 @@ class ConsumptionManager {
|
|
|
65
66
|
this.adaptiveBatchSize = Math.min(this.adaptiveBatchSize * 2, config_1.HMSH_BATCH_SIZE);
|
|
66
67
|
}
|
|
67
68
|
if (this.adaptiveReservationTimeout !== prevTimeout) {
|
|
68
|
-
this.stream.reservationTimeout =
|
|
69
|
+
this.stream.reservationTimeout =
|
|
70
|
+
this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S;
|
|
69
71
|
this.logger.info('stream-reservation-timeout-adjusted', {
|
|
70
72
|
stream,
|
|
71
73
|
depth,
|
|
@@ -244,7 +246,7 @@ class ConsumptionManager {
|
|
|
244
246
|
enableNotifications: true,
|
|
245
247
|
notificationCallback,
|
|
246
248
|
blockTimeout: config_1.HMSH_BLOCK_TIME_MS,
|
|
247
|
-
reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S,
|
|
249
|
+
reservationTimeout: config_1.HMSH_RESERVATION_TIMEOUT_S + ConsumptionManager.LEASE_BUFFER_S,
|
|
248
250
|
});
|
|
249
251
|
// Don't block here - let the worker initialization complete
|
|
250
252
|
// The notification system will handle message processing asynchronously
|
|
@@ -297,7 +299,7 @@ class ConsumptionManager {
|
|
|
297
299
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
298
300
|
blockTimeout: streamDuration,
|
|
299
301
|
batchSize,
|
|
300
|
-
reservationTimeout: this.adaptiveReservationTimeout,
|
|
302
|
+
reservationTimeout: this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S,
|
|
301
303
|
enableBackoff: true,
|
|
302
304
|
initialBackoff: config_1.INITIAL_STREAM_BACKOFF,
|
|
303
305
|
maxBackoff: config_1.MAX_STREAM_BACKOFF,
|
|
@@ -312,7 +314,7 @@ class ConsumptionManager {
|
|
|
312
314
|
messages = await this.stream.consumeMessages(stream, group, consumer, {
|
|
313
315
|
blockTimeout: streamDuration,
|
|
314
316
|
batchSize,
|
|
315
|
-
reservationTimeout: this.adaptiveReservationTimeout,
|
|
317
|
+
reservationTimeout: this.adaptiveReservationTimeout + ConsumptionManager.LEASE_BUFFER_S,
|
|
316
318
|
enableBackoff: false,
|
|
317
319
|
maxRetries: 1,
|
|
318
320
|
});
|
|
@@ -491,23 +493,63 @@ class ConsumptionManager {
|
|
|
491
493
|
}
|
|
492
494
|
return;
|
|
493
495
|
}
|
|
496
|
+
// Lease deadline: the full configured reservation timeout (N).
|
|
497
|
+
// The reclaim interval is N+5s, so the deadline always fires
|
|
498
|
+
// before a reclaimant can pick up the message. This preserves
|
|
499
|
+
// the user's contract — if they set 30s, the function gets 30s.
|
|
500
|
+
const deadlineMs = this.adaptiveReservationTimeout * 1000;
|
|
494
501
|
let output;
|
|
495
502
|
const telemetry = new telemetry_1.RouterTelemetry(this.appId);
|
|
496
503
|
try {
|
|
497
504
|
telemetry.startStreamSpan(input, this.role);
|
|
498
|
-
|
|
505
|
+
let deadlineTimer;
|
|
506
|
+
const deadlinePromise = new Promise((_, reject) => {
|
|
507
|
+
deadlineTimer = setTimeout(() => reject(new errors_1.LeaseExpiredError(deadlineMs, this.adaptiveReservationTimeout)), deadlineMs);
|
|
508
|
+
});
|
|
509
|
+
try {
|
|
510
|
+
output = await Promise.race([
|
|
511
|
+
this.execStreamLeg(input, stream, id, callback.bind(this)),
|
|
512
|
+
deadlinePromise,
|
|
513
|
+
]);
|
|
514
|
+
}
|
|
515
|
+
finally {
|
|
516
|
+
clearTimeout(deadlineTimer);
|
|
517
|
+
}
|
|
499
518
|
telemetry.setStreamErrorFromOutput(output);
|
|
500
519
|
this.errorCount = 0;
|
|
501
520
|
}
|
|
502
521
|
catch (err) {
|
|
503
|
-
|
|
522
|
+
const category = (0, errors_1.classifyError)(err);
|
|
523
|
+
if (err instanceof errors_1.LeaseExpiredError) {
|
|
524
|
+
// FATAL: lease expired — do NOT ack. The message remains in the
|
|
525
|
+
// stream for a reclaimant to pick up cleanly. Any partial writes
|
|
526
|
+
// from this consumer are idempotent via collation.
|
|
527
|
+
this.logger.error('stream-lease-expired', {
|
|
528
|
+
category,
|
|
529
|
+
group,
|
|
530
|
+
stream,
|
|
531
|
+
id,
|
|
532
|
+
deadlineMs,
|
|
533
|
+
reservationTimeoutS: this.adaptiveReservationTimeout,
|
|
534
|
+
topic: input.metadata?.topic,
|
|
535
|
+
activityId: input.metadata?.aid,
|
|
536
|
+
jobId: input.metadata?.jid,
|
|
537
|
+
});
|
|
538
|
+
telemetry.setStreamErrorFromException(err);
|
|
539
|
+
telemetry.endStreamSpan();
|
|
540
|
+
return; // NO ack — leave for reclaimant
|
|
541
|
+
}
|
|
542
|
+
this.logger.error(`stream-read-one-error`, {
|
|
543
|
+
category,
|
|
544
|
+
group,
|
|
545
|
+
stream,
|
|
546
|
+
id,
|
|
547
|
+
err,
|
|
548
|
+
});
|
|
504
549
|
telemetry.setStreamErrorFromException(err);
|
|
505
550
|
output = this.errorHandler.structureUnhandledError(input, err instanceof Error ? err : new Error(String(err)));
|
|
506
551
|
}
|
|
507
552
|
try {
|
|
508
|
-
// When the ENGINE itself fails to process a message (e.g., schema not
|
|
509
|
-
// found, missing subscription), do NOT republish the error back to the
|
|
510
|
-
// engine stream — that creates an infinite poison loop. The engine
|
|
511
553
|
// When the ENGINE encounters an infrastructure error (schema not found,
|
|
512
554
|
// subscription missing — code 598), the message is permanently unprocessable.
|
|
513
555
|
// Do NOT republish it — that creates an infinite poison loop. Only suppress
|
|
@@ -515,6 +557,7 @@ class ConsumptionManager {
|
|
|
515
557
|
// duplicates, workflow failures) must still flow through normally.
|
|
516
558
|
if (group === 'ENGINE' && output?.code === 598) {
|
|
517
559
|
this.logger.error(`stream-engine-dispatch-fatal`, {
|
|
560
|
+
category: errors_1.ErrorCategory.FATAL,
|
|
518
561
|
stream, id, group,
|
|
519
562
|
aid: input.metadata?.aid,
|
|
520
563
|
jid: input.metadata?.jid,
|
|
@@ -530,6 +573,7 @@ class ConsumptionManager {
|
|
|
530
573
|
// If publishResponse fails, still ack the message to prevent
|
|
531
574
|
// infinite reprocessing. Log the error for debugging.
|
|
532
575
|
this.logger.error(`stream-publish-response-error`, {
|
|
576
|
+
category: (0, errors_1.classifyError)(publishErr),
|
|
533
577
|
group, stream, id, error: publishErr,
|
|
534
578
|
});
|
|
535
579
|
this.errorCount++;
|
|
@@ -547,6 +591,7 @@ class ConsumptionManager {
|
|
|
547
591
|
}
|
|
548
592
|
catch (error) {
|
|
549
593
|
this.logger.error(`stream-call-function-error`, {
|
|
594
|
+
category: (0, errors_1.classifyError)(error),
|
|
550
595
|
error,
|
|
551
596
|
input: input,
|
|
552
597
|
stack: error.stack,
|
|
@@ -620,4 +665,8 @@ class ConsumptionManager {
|
|
|
620
665
|
ConsumptionManager.DEPTH_CHECK_INTERVAL_MS = 10000;
|
|
621
666
|
ConsumptionManager.DEPTH_SCALE_UP_THRESHOLD = 100;
|
|
622
667
|
ConsumptionManager.DEPTH_SCALE_DOWN_THRESHOLD = 10;
|
|
668
|
+
// Buffer between the activity deadline (N) and the reclaim interval
|
|
669
|
+
// (N+5). The function gets the full configured timeout; the extra 5s
|
|
670
|
+
// ensures the deadline fires before a reclaimant can pick up the message.
|
|
671
|
+
ConsumptionManager.LEASE_BUFFER_S = 5;
|
|
623
672
|
exports.ConsumptionManager = ConsumptionManager;
|
|
@@ -5,7 +5,9 @@ class StreamService {
|
|
|
5
5
|
constructor(streamClient, storeClient, config = {}) {
|
|
6
6
|
// Adaptive reservation timeout — set by the consumption manager
|
|
7
7
|
// based on stream depth. Providers read this when reserving messages.
|
|
8
|
-
|
|
8
|
+
// Includes a +5s buffer over the activity deadline so the deadline
|
|
9
|
+
// always fires before reclaim (see ConsumptionManager.LEASE_BUFFER_S).
|
|
10
|
+
this.reservationTimeout = 35;
|
|
9
11
|
this.streamClient = streamClient;
|
|
10
12
|
this.storeClient = storeClient;
|
|
11
13
|
this.config = config;
|
|
@@ -161,17 +161,12 @@ async function ensureIndexes(client, schemaName) {
|
|
|
161
161
|
WHERE expired_at IS NULL;
|
|
162
162
|
`);
|
|
163
163
|
// v0.18.0: add jid column to engine_streams for job tracing
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
CREATE INDEX IF NOT EXISTS idx_engine_streams_jid_created
|
|
171
|
-
ON ${engineTable} (jid, created_at)
|
|
172
|
-
WHERE jid != '';
|
|
173
|
-
`);
|
|
174
|
-
}
|
|
164
|
+
await client.query(`ALTER TABLE ${engineTable} ADD COLUMN IF NOT EXISTS jid TEXT NOT NULL DEFAULT ''`);
|
|
165
|
+
await client.query(`
|
|
166
|
+
CREATE INDEX IF NOT EXISTS idx_engine_streams_jid_created
|
|
167
|
+
ON ${engineTable} (jid, created_at)
|
|
168
|
+
WHERE jid != '';
|
|
169
|
+
`);
|
|
175
170
|
}
|
|
176
171
|
async function createTables(client, schemaName) {
|
|
177
172
|
await client.query(`CREATE SCHEMA IF NOT EXISTS ${schemaName};`);
|
|
@@ -223,7 +223,7 @@ async function fetchMessages(client, tableName, streamName, isEngine, consumerNa
|
|
|
223
223
|
while (retries < maxRetries) {
|
|
224
224
|
retries++;
|
|
225
225
|
const batchSize = options?.batchSize || 1;
|
|
226
|
-
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
226
|
+
const reservationTimeout = options?.reservationTimeout || (enums_1.HMSH_RESERVATION_TIMEOUT_S + 5);
|
|
227
227
|
const res = await client.query(`UPDATE ${tableName}
|
|
228
228
|
SET reserved_at = NOW(), reserved_by = $3
|
|
229
229
|
WHERE id IN (
|
|
@@ -21,7 +21,7 @@ async function fetchMessagesSecured(client, schema, streamName, consumerName, op
|
|
|
21
21
|
const maxBackoff = options?.maxBackoff ?? 3000;
|
|
22
22
|
const maxRetries = options?.maxRetries ?? 3;
|
|
23
23
|
const batchSize = options?.batchSize || 1;
|
|
24
|
-
const reservationTimeout = options?.reservationTimeout || enums_1.HMSH_RESERVATION_TIMEOUT_S;
|
|
24
|
+
const reservationTimeout = options?.reservationTimeout || (enums_1.HMSH_RESERVATION_TIMEOUT_S + 5);
|
|
25
25
|
let backoff = initialBackoff;
|
|
26
26
|
let retries = 0;
|
|
27
27
|
try {
|