@aichatwar/shared 1.0.170 → 1.0.171
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Consumer, EachMessagePayload } from "kafkajs";
|
|
1
|
+
import { Consumer, Producer, EachMessagePayload } from "kafkajs";
|
|
2
2
|
import { BaseEvent } from '../baseEvent';
|
|
3
3
|
export type { EachMessagePayload };
|
|
4
4
|
export declare abstract class Listener<T extends BaseEvent> {
|
|
@@ -20,9 +20,12 @@ export declare abstract class Listener<T extends BaseEvent> {
|
|
|
20
20
|
healthy: boolean;
|
|
21
21
|
}>;
|
|
22
22
|
protected consumer: Consumer;
|
|
23
|
+
protected dlqProducer?: Producer;
|
|
23
24
|
protected ackDeadline: number;
|
|
24
25
|
protected fromBeginning: boolean;
|
|
25
26
|
protected maxEventAgeMs: number;
|
|
27
|
+
protected maxMessageRetries: number;
|
|
28
|
+
protected messageRetryBaseDelayMs: number;
|
|
26
29
|
private currentPayload?;
|
|
27
30
|
private retryCount;
|
|
28
31
|
private readonly maxInitialRetries;
|
|
@@ -35,8 +38,14 @@ export declare abstract class Listener<T extends BaseEvent> {
|
|
|
35
38
|
private lastSuccessfulMessageAt;
|
|
36
39
|
/** Expose consumer health for readiness probes */
|
|
37
40
|
get healthy(): boolean;
|
|
38
|
-
constructor(consumer: Consumer);
|
|
41
|
+
constructor(consumer: Consumer, dlqProducer?: Producer);
|
|
39
42
|
private setupCrashHandler;
|
|
43
|
+
/**
|
|
44
|
+
* Detects errors that are guaranteed to fail on every retry (poison messages).
|
|
45
|
+
* These should go straight to DLQ without wasting retry attempts.
|
|
46
|
+
*/
|
|
47
|
+
private isPermanentError;
|
|
48
|
+
private publishToDlq;
|
|
40
49
|
ack(payload?: EachMessagePayload): Promise<void>;
|
|
41
50
|
listen(): Promise<void>;
|
|
42
51
|
}
|
|
@@ -34,10 +34,12 @@ class Listener {
|
|
|
34
34
|
get healthy() {
|
|
35
35
|
return this.isListening;
|
|
36
36
|
}
|
|
37
|
-
constructor(consumer) {
|
|
37
|
+
constructor(consumer, dlqProducer) {
|
|
38
38
|
this.ackDeadline = 5 * 1000; // 5 seconds
|
|
39
39
|
this.fromBeginning = false; // Override in subclasses to read from beginning
|
|
40
40
|
this.maxEventAgeMs = 0; // 0 = disabled. When set, messages older than this are auto-acked and skipped.
|
|
41
|
+
this.maxMessageRetries = 3; // Message-level retries before DLQ
|
|
42
|
+
this.messageRetryBaseDelayMs = 1000;
|
|
41
43
|
this.retryCount = 0;
|
|
42
44
|
this.maxInitialRetries = 3; // Show detailed retry logs for first 3 attempts
|
|
43
45
|
this.maxRetryDelay = 60000; // Cap delay at 60 seconds
|
|
@@ -48,6 +50,7 @@ class Listener {
|
|
|
48
50
|
this.maxCrashRestartDelay = 120000; // 2 minutes cap
|
|
49
51
|
this.lastSuccessfulMessageAt = 0; // Timestamp of last successfully processed message
|
|
50
52
|
this.consumer = consumer;
|
|
53
|
+
this.dlqProducer = dlqProducer;
|
|
51
54
|
Listener.registry.push(this);
|
|
52
55
|
this.setupCrashHandler();
|
|
53
56
|
}
|
|
@@ -93,6 +96,61 @@ class Listener {
|
|
|
93
96
|
});
|
|
94
97
|
this.crashHandlerSetup = true;
|
|
95
98
|
}
|
|
99
|
+
/**
|
|
100
|
+
* Detects errors that are guaranteed to fail on every retry (poison messages).
|
|
101
|
+
* These should go straight to DLQ without wasting retry attempts.
|
|
102
|
+
*/
|
|
103
|
+
isPermanentError(error) {
|
|
104
|
+
if (!error)
|
|
105
|
+
return false;
|
|
106
|
+
// Mongoose VersionError — document version has advanced, stale event can never match
|
|
107
|
+
if (error.name === 'VersionError')
|
|
108
|
+
return true;
|
|
109
|
+
// MongoDB E11000 duplicate key — record already exists
|
|
110
|
+
if (error.code === 11000)
|
|
111
|
+
return true;
|
|
112
|
+
// NotFoundError thrown in listener — document doesn't exist (out-of-order event)
|
|
113
|
+
if (error.name === 'NotFoundError' || error.statusCode === 404)
|
|
114
|
+
return true;
|
|
115
|
+
// ValidationError — schema violation, data itself is invalid
|
|
116
|
+
if (error.name === 'ValidationError')
|
|
117
|
+
return true;
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
publishToDlq(data, error, partition, offset) {
|
|
121
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
122
|
+
if (!this.dlqProducer)
|
|
123
|
+
return;
|
|
124
|
+
const dlqTopic = `${this.topic}.dlq`;
|
|
125
|
+
try {
|
|
126
|
+
yield this.dlqProducer.send({
|
|
127
|
+
topic: dlqTopic,
|
|
128
|
+
messages: [{
|
|
129
|
+
key: (data === null || data === void 0 ? void 0 : data.id) || (data === null || data === void 0 ? void 0 : data.messageId) || null,
|
|
130
|
+
value: JSON.stringify({
|
|
131
|
+
originalData: data,
|
|
132
|
+
error: (error === null || error === void 0 ? void 0 : error.message) || 'Unknown error',
|
|
133
|
+
errorName: (error === null || error === void 0 ? void 0 : error.name) || 'Error',
|
|
134
|
+
errorCode: error === null || error === void 0 ? void 0 : error.code,
|
|
135
|
+
topic: this.topic,
|
|
136
|
+
groupId: this.groupId,
|
|
137
|
+
partition,
|
|
138
|
+
offset,
|
|
139
|
+
failedAt: new Date().toISOString(),
|
|
140
|
+
}),
|
|
141
|
+
timestamp: Date.now().toString(),
|
|
142
|
+
}],
|
|
143
|
+
});
|
|
144
|
+
console.warn(`☠️ [${this.topic}] Poison message sent to DLQ ${dlqTopic} ` +
|
|
145
|
+
`(partition: ${partition}, offset: ${offset}, error: ${(error === null || error === void 0 ? void 0 : error.name) || 'Error'}: ${error === null || error === void 0 ? void 0 : error.message})`);
|
|
146
|
+
}
|
|
147
|
+
catch (dlqErr) {
|
|
148
|
+
console.error(`[${this.topic}] CRITICAL: Failed to publish to DLQ ${dlqTopic}: ${dlqErr.message}. ` +
|
|
149
|
+
`Original error: ${error === null || error === void 0 ? void 0 : error.message}. Message at partition ${partition} offset ${offset} will be retried by crash handler.`);
|
|
150
|
+
throw error;
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
}
|
|
96
154
|
// Manual acknowledgment method
|
|
97
155
|
// Accepts optional payload for custom consumer patterns (e.g., realtime-gateway)
|
|
98
156
|
ack(payload) {
|
|
@@ -140,14 +198,7 @@ class Listener {
|
|
|
140
198
|
}
|
|
141
199
|
// Reset retry count on successful connection
|
|
142
200
|
this.retryCount = 0;
|
|
143
|
-
console.log(`🚀 [${this.topic}]
|
|
144
|
-
console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}`);
|
|
145
|
-
console.log(`🚀 [${this.topic}] Consumer configuration:`, {
|
|
146
|
-
groupId: this.groupId,
|
|
147
|
-
topic: this.topic,
|
|
148
|
-
fromBeginning: this.fromBeginning,
|
|
149
|
-
autoCommit: false,
|
|
150
|
-
});
|
|
201
|
+
console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}, dlq: ${this.dlqProducer ? 'enabled' : 'disabled'}`);
|
|
151
202
|
yield this.consumer.run({
|
|
152
203
|
// CRITICAL: Disable auto-commit to prevent message loss during rebalancing
|
|
153
204
|
// Offsets are only committed when we explicitly call ack()
|
|
@@ -164,8 +215,6 @@ class Listener {
|
|
|
164
215
|
}]);
|
|
165
216
|
return;
|
|
166
217
|
}
|
|
167
|
-
// Commented out key logging as requested
|
|
168
|
-
// console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, key: ${payload.message.key?.toString() || 'none'}`);
|
|
169
218
|
console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, value length: ${payload.message.value.toString().length}`);
|
|
170
219
|
// Skip stale messages when maxEventAgeMs is configured
|
|
171
220
|
if (this.maxEventAgeMs > 0 && payload.message.timestamp) {
|
|
@@ -181,17 +230,74 @@ class Listener {
|
|
|
181
230
|
}
|
|
182
231
|
// Store current payload for manual ack
|
|
183
232
|
this.currentPayload = payload;
|
|
233
|
+
let data;
|
|
234
|
+
try {
|
|
235
|
+
data = JSON.parse(payload.message.value.toString());
|
|
236
|
+
}
|
|
237
|
+
catch (parseErr) {
|
|
238
|
+
console.error(`❌ [${this.topic}] JSON parse error at offset ${payload.message.offset} — skipping malformed message`);
|
|
239
|
+
if (this.dlqProducer) {
|
|
240
|
+
yield this.publishToDlq(payload.message.value.toString(), parseErr, payload.partition, payload.message.offset);
|
|
241
|
+
}
|
|
242
|
+
yield this.consumer.commitOffsets([{
|
|
243
|
+
topic: payload.topic,
|
|
244
|
+
partition: payload.partition,
|
|
245
|
+
offset: (BigInt(payload.message.offset) + BigInt(1)).toString()
|
|
246
|
+
}]);
|
|
247
|
+
this.currentPayload = undefined;
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
184
250
|
try {
|
|
185
|
-
const data = JSON.parse(payload.message.value.toString());
|
|
186
251
|
yield this.onMessage(data, payload);
|
|
187
252
|
// Note: Child listeners MUST call this.ack() manually after successful processing
|
|
188
253
|
// If they don't call ack(), the message will be redelivered after session timeout
|
|
189
254
|
// This ensures at-least-once delivery semantics
|
|
190
255
|
}
|
|
191
256
|
catch (error) {
|
|
257
|
+
// --- DLQ-based poison message handling ---
|
|
258
|
+
// If a DLQ producer is available, we handle the error here instead of
|
|
259
|
+
// crashing the consumer. The message is retried N times, then published
|
|
260
|
+
// to a DLQ topic and acked so the consumer can move on.
|
|
261
|
+
if (this.dlqProducer) {
|
|
262
|
+
if (this.isPermanentError(error)) {
|
|
263
|
+
console.error(`❌ [${this.topic}] Permanent error (${error.name || 'Error'}) at partition ${payload.partition} ` +
|
|
264
|
+
`offset ${payload.message.offset} — skipping retries, sending to DLQ`);
|
|
265
|
+
yield this.publishToDlq(data, error, payload.partition, payload.message.offset);
|
|
266
|
+
yield this.ack(payload);
|
|
267
|
+
this.currentPayload = undefined;
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
// Retry transient errors with backoff
|
|
271
|
+
let lastRetryError = error;
|
|
272
|
+
for (let attempt = 2; attempt <= this.maxMessageRetries; attempt++) {
|
|
273
|
+
const delay = this.messageRetryBaseDelayMs * Math.pow(2, attempt - 2);
|
|
274
|
+
console.warn(`[${this.topic}] Retry ${attempt}/${this.maxMessageRetries} for offset ${payload.message.offset} ` +
|
|
275
|
+
`in ${delay}ms (error: ${lastRetryError.message})`);
|
|
276
|
+
yield new Promise(r => setTimeout(r, delay));
|
|
277
|
+
try {
|
|
278
|
+
this.currentPayload = payload;
|
|
279
|
+
yield this.onMessage(data, payload);
|
|
280
|
+
this.currentPayload = undefined;
|
|
281
|
+
return; // Retry succeeded — onMessage called ack()
|
|
282
|
+
}
|
|
283
|
+
catch (retryErr) {
|
|
284
|
+
lastRetryError = retryErr;
|
|
285
|
+
if (this.isPermanentError(retryErr)) {
|
|
286
|
+
console.error(`❌ [${this.topic}] Permanent error on retry ${attempt} — sending to DLQ`);
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// All retries exhausted — DLQ + ack
|
|
292
|
+
console.error(`❌ [${this.topic}] All ${this.maxMessageRetries} retries exhausted for offset ${payload.message.offset} — sending to DLQ`);
|
|
293
|
+
yield this.publishToDlq(data, lastRetryError, payload.partition, payload.message.offset);
|
|
294
|
+
yield this.ack(payload);
|
|
295
|
+
this.currentPayload = undefined;
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
// --- Legacy behavior (no DLQ producer) ---
|
|
299
|
+
// Re-throw to trigger the crash handler, which will eventually process.exit(1)
|
|
192
300
|
console.error(`❌ [${this.topic}] Error processing message for topic: ${this.topic}, offset: ${payload.message.offset}`, error);
|
|
193
|
-
// In case of error, we don't commit the offset, so the message will be redelivered
|
|
194
|
-
// This ensures failed messages are retried
|
|
195
301
|
throw error;
|
|
196
302
|
}
|
|
197
303
|
finally {
|