@aichatwar/shared 1.0.169 → 1.0.171
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,14 +1,31 @@
|
|
|
1
|
-
import { Consumer, EachMessagePayload } from "kafkajs";
|
|
1
|
+
import { Consumer, Producer, EachMessagePayload } from "kafkajs";
|
|
2
2
|
import { BaseEvent } from '../baseEvent';
|
|
3
3
|
export type { EachMessagePayload };
|
|
4
4
|
export declare abstract class Listener<T extends BaseEvent> {
|
|
5
5
|
abstract topic: T['subject'];
|
|
6
6
|
abstract groupId: string;
|
|
7
7
|
abstract onMessage(data: T['data'], payload: EachMessagePayload): Promise<void>;
|
|
8
|
+
private static registry;
|
|
9
|
+
/**
|
|
10
|
+
* Returns true only if every registered listener's consumer is connected.
|
|
11
|
+
* Wire this into your /ready endpoint alongside the MongoDB check.
|
|
12
|
+
*/
|
|
13
|
+
static allHealthy(): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Returns per-listener health details for diagnostics.
|
|
16
|
+
*/
|
|
17
|
+
static healthDetails(): Array<{
|
|
18
|
+
topic: string;
|
|
19
|
+
groupId: string;
|
|
20
|
+
healthy: boolean;
|
|
21
|
+
}>;
|
|
8
22
|
protected consumer: Consumer;
|
|
23
|
+
protected dlqProducer?: Producer;
|
|
9
24
|
protected ackDeadline: number;
|
|
10
25
|
protected fromBeginning: boolean;
|
|
11
26
|
protected maxEventAgeMs: number;
|
|
27
|
+
protected maxMessageRetries: number;
|
|
28
|
+
protected messageRetryBaseDelayMs: number;
|
|
12
29
|
private currentPayload?;
|
|
13
30
|
private retryCount;
|
|
14
31
|
private readonly maxInitialRetries;
|
|
@@ -16,9 +33,19 @@ export declare abstract class Listener<T extends BaseEvent> {
|
|
|
16
33
|
private isListening;
|
|
17
34
|
private crashHandlerSetup;
|
|
18
35
|
private crashRestartCount;
|
|
36
|
+
private readonly maxCrashRestarts;
|
|
19
37
|
private readonly maxCrashRestartDelay;
|
|
20
|
-
|
|
38
|
+
private lastSuccessfulMessageAt;
|
|
39
|
+
/** Expose consumer health for readiness probes */
|
|
40
|
+
get healthy(): boolean;
|
|
41
|
+
constructor(consumer: Consumer, dlqProducer?: Producer);
|
|
21
42
|
private setupCrashHandler;
|
|
43
|
+
/**
|
|
44
|
+
* Detects errors that are guaranteed to fail on every retry (poison messages).
|
|
45
|
+
* These should go straight to DLQ without wasting retry attempts.
|
|
46
|
+
*/
|
|
47
|
+
private isPermanentError;
|
|
48
|
+
private publishToDlq;
|
|
22
49
|
ack(payload?: EachMessagePayload): Promise<void>;
|
|
23
50
|
listen(): Promise<void>;
|
|
24
51
|
}
|
|
@@ -11,18 +11,47 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.Listener = void 0;
|
|
13
13
|
class Listener {
|
|
14
|
-
|
|
14
|
+
/**
|
|
15
|
+
* Returns true only if every registered listener's consumer is connected.
|
|
16
|
+
* Wire this into your /ready endpoint alongside the MongoDB check.
|
|
17
|
+
*/
|
|
18
|
+
static allHealthy() {
|
|
19
|
+
if (Listener.registry.length === 0)
|
|
20
|
+
return true;
|
|
21
|
+
return Listener.registry.every(l => l.healthy);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Returns per-listener health details for diagnostics.
|
|
25
|
+
*/
|
|
26
|
+
static healthDetails() {
|
|
27
|
+
return Listener.registry.map(l => ({
|
|
28
|
+
topic: l.topic,
|
|
29
|
+
groupId: l.groupId,
|
|
30
|
+
healthy: l.healthy,
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
/** Expose consumer health for readiness probes */
|
|
34
|
+
get healthy() {
|
|
35
|
+
return this.isListening;
|
|
36
|
+
}
|
|
37
|
+
constructor(consumer, dlqProducer) {
|
|
15
38
|
this.ackDeadline = 5 * 1000; // 5 seconds
|
|
16
39
|
this.fromBeginning = false; // Override in subclasses to read from beginning
|
|
17
40
|
this.maxEventAgeMs = 0; // 0 = disabled. When set, messages older than this are auto-acked and skipped.
|
|
41
|
+
this.maxMessageRetries = 3; // Message-level retries before DLQ
|
|
42
|
+
this.messageRetryBaseDelayMs = 1000;
|
|
18
43
|
this.retryCount = 0;
|
|
19
44
|
this.maxInitialRetries = 3; // Show detailed retry logs for first 3 attempts
|
|
20
45
|
this.maxRetryDelay = 60000; // Cap delay at 60 seconds
|
|
21
46
|
this.isListening = false; // Track if listener is active
|
|
22
47
|
this.crashHandlerSetup = false; // Track if crash handler has been set up
|
|
23
48
|
this.crashRestartCount = 0;
|
|
49
|
+
this.maxCrashRestarts = 5; // Exit process after this many consecutive crashes
|
|
24
50
|
this.maxCrashRestartDelay = 120000; // 2 minutes cap
|
|
51
|
+
this.lastSuccessfulMessageAt = 0; // Timestamp of last successfully processed message
|
|
25
52
|
this.consumer = consumer;
|
|
53
|
+
this.dlqProducer = dlqProducer;
|
|
54
|
+
Listener.registry.push(this);
|
|
26
55
|
this.setupCrashHandler();
|
|
27
56
|
}
|
|
28
57
|
setupCrashHandler() {
|
|
@@ -37,23 +66,91 @@ class Listener {
|
|
|
37
66
|
return;
|
|
38
67
|
}
|
|
39
68
|
this.crashRestartCount++;
|
|
69
|
+
this.isListening = false;
|
|
70
|
+
if (this.crashRestartCount >= this.maxCrashRestarts) {
|
|
71
|
+
console.error(`💀 [${this.topic}] Consumer crashed ${this.crashRestartCount} times without recovery. ` +
|
|
72
|
+
`Exiting process to let Kubernetes restart the pod with a fresh state.`);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
40
75
|
const delay = Math.min(5000 * Math.pow(2, Math.min(this.crashRestartCount - 1, 5)), this.maxCrashRestartDelay);
|
|
41
|
-
console.error(`[${this.topic}] Consumer crashed (restart #${this.crashRestartCount}, ` +
|
|
76
|
+
console.error(`[${this.topic}] Consumer crashed (restart #${this.crashRestartCount}/${this.maxCrashRestarts}, ` +
|
|
42
77
|
`retrying in ${delay}ms):`, error);
|
|
43
|
-
this.isListening = false;
|
|
44
78
|
try {
|
|
45
79
|
yield this.consumer.disconnect();
|
|
46
80
|
}
|
|
47
81
|
catch (_) { /* best effort */ }
|
|
48
82
|
setTimeout(() => {
|
|
49
|
-
console.log(`[${this.topic}] Auto-restarting consumer after crash (attempt #${this.crashRestartCount})...`);
|
|
83
|
+
console.log(`[${this.topic}] Auto-restarting consumer after crash (attempt #${this.crashRestartCount}/${this.maxCrashRestarts})...`);
|
|
50
84
|
this.listen().catch((err) => {
|
|
51
85
|
console.error(`[${this.topic}] Auto-restart failed:`, err);
|
|
86
|
+
console.error(`💀 [${this.topic}] Auto-restart listen() threw. Exiting process to let Kubernetes restart the pod.`);
|
|
87
|
+
process.exit(1);
|
|
52
88
|
});
|
|
53
89
|
}, delay);
|
|
54
90
|
}));
|
|
91
|
+
this.consumer.on('consumer.disconnect', () => {
|
|
92
|
+
if (this.isListening) {
|
|
93
|
+
console.warn(`⚠️ [${this.topic}] Consumer disconnected unexpectedly`);
|
|
94
|
+
this.isListening = false;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
55
97
|
this.crashHandlerSetup = true;
|
|
56
98
|
}
|
|
99
|
+
/**
|
|
100
|
+
* Detects errors that are guaranteed to fail on every retry (poison messages).
|
|
101
|
+
* These should go straight to DLQ without wasting retry attempts.
|
|
102
|
+
*/
|
|
103
|
+
isPermanentError(error) {
|
|
104
|
+
if (!error)
|
|
105
|
+
return false;
|
|
106
|
+
// Mongoose VersionError — document version has advanced, stale event can never match
|
|
107
|
+
if (error.name === 'VersionError')
|
|
108
|
+
return true;
|
|
109
|
+
// MongoDB E11000 duplicate key — record already exists
|
|
110
|
+
if (error.code === 11000)
|
|
111
|
+
return true;
|
|
112
|
+
// NotFoundError thrown in listener — document doesn't exist (out-of-order event)
|
|
113
|
+
if (error.name === 'NotFoundError' || error.statusCode === 404)
|
|
114
|
+
return true;
|
|
115
|
+
// ValidationError — schema violation, data itself is invalid
|
|
116
|
+
if (error.name === 'ValidationError')
|
|
117
|
+
return true;
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
publishToDlq(data, error, partition, offset) {
|
|
121
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
122
|
+
if (!this.dlqProducer)
|
|
123
|
+
return;
|
|
124
|
+
const dlqTopic = `${this.topic}.dlq`;
|
|
125
|
+
try {
|
|
126
|
+
yield this.dlqProducer.send({
|
|
127
|
+
topic: dlqTopic,
|
|
128
|
+
messages: [{
|
|
129
|
+
key: (data === null || data === void 0 ? void 0 : data.id) || (data === null || data === void 0 ? void 0 : data.messageId) || null,
|
|
130
|
+
value: JSON.stringify({
|
|
131
|
+
originalData: data,
|
|
132
|
+
error: (error === null || error === void 0 ? void 0 : error.message) || 'Unknown error',
|
|
133
|
+
errorName: (error === null || error === void 0 ? void 0 : error.name) || 'Error',
|
|
134
|
+
errorCode: error === null || error === void 0 ? void 0 : error.code,
|
|
135
|
+
topic: this.topic,
|
|
136
|
+
groupId: this.groupId,
|
|
137
|
+
partition,
|
|
138
|
+
offset,
|
|
139
|
+
failedAt: new Date().toISOString(),
|
|
140
|
+
}),
|
|
141
|
+
timestamp: Date.now().toString(),
|
|
142
|
+
}],
|
|
143
|
+
});
|
|
144
|
+
console.warn(`☠️ [${this.topic}] Poison message sent to DLQ ${dlqTopic} ` +
|
|
145
|
+
`(partition: ${partition}, offset: ${offset}, error: ${(error === null || error === void 0 ? void 0 : error.name) || 'Error'}: ${error === null || error === void 0 ? void 0 : error.message})`);
|
|
146
|
+
}
|
|
147
|
+
catch (dlqErr) {
|
|
148
|
+
console.error(`[${this.topic}] CRITICAL: Failed to publish to DLQ ${dlqTopic}: ${dlqErr.message}. ` +
|
|
149
|
+
`Original error: ${error === null || error === void 0 ? void 0 : error.message}. Message at partition ${partition} offset ${offset} will be retried by crash handler.`);
|
|
150
|
+
throw error;
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
}
|
|
57
154
|
// Manual acknowledgment method
|
|
58
155
|
// Accepts optional payload for custom consumer patterns (e.g., realtime-gateway)
|
|
59
156
|
ack(payload) {
|
|
@@ -67,6 +164,12 @@ class Listener {
|
|
|
67
164
|
partition: targetPayload.partition,
|
|
68
165
|
offset: (BigInt(targetPayload.message.offset) + BigInt(1)).toString()
|
|
69
166
|
}]);
|
|
167
|
+
// Reset crash counter on successful processing — proves the consumer is healthy
|
|
168
|
+
if (this.crashRestartCount > 0) {
|
|
169
|
+
console.log(`[${this.topic}] Crash counter reset (was ${this.crashRestartCount}) after successful message processing`);
|
|
170
|
+
this.crashRestartCount = 0;
|
|
171
|
+
}
|
|
172
|
+
this.lastSuccessfulMessageAt = Date.now();
|
|
70
173
|
console.log(`Message manually acknowledged for topic: ${this.topic}`);
|
|
71
174
|
});
|
|
72
175
|
}
|
|
@@ -95,14 +198,7 @@ class Listener {
|
|
|
95
198
|
}
|
|
96
199
|
// Reset retry count on successful connection
|
|
97
200
|
this.retryCount = 0;
|
|
98
|
-
console.log(`🚀 [${this.topic}]
|
|
99
|
-
console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}`);
|
|
100
|
-
console.log(`🚀 [${this.topic}] Consumer configuration:`, {
|
|
101
|
-
groupId: this.groupId,
|
|
102
|
-
topic: this.topic,
|
|
103
|
-
fromBeginning: this.fromBeginning,
|
|
104
|
-
autoCommit: false,
|
|
105
|
-
});
|
|
201
|
+
console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}, dlq: ${this.dlqProducer ? 'enabled' : 'disabled'}`);
|
|
106
202
|
yield this.consumer.run({
|
|
107
203
|
// CRITICAL: Disable auto-commit to prevent message loss during rebalancing
|
|
108
204
|
// Offsets are only committed when we explicitly call ack()
|
|
@@ -119,8 +215,6 @@ class Listener {
|
|
|
119
215
|
}]);
|
|
120
216
|
return;
|
|
121
217
|
}
|
|
122
|
-
// Commented out key logging as requested
|
|
123
|
-
// console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, key: ${payload.message.key?.toString() || 'none'}`);
|
|
124
218
|
console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, value length: ${payload.message.value.toString().length}`);
|
|
125
219
|
// Skip stale messages when maxEventAgeMs is configured
|
|
126
220
|
if (this.maxEventAgeMs > 0 && payload.message.timestamp) {
|
|
@@ -136,17 +230,74 @@ class Listener {
|
|
|
136
230
|
}
|
|
137
231
|
// Store current payload for manual ack
|
|
138
232
|
this.currentPayload = payload;
|
|
233
|
+
let data;
|
|
234
|
+
try {
|
|
235
|
+
data = JSON.parse(payload.message.value.toString());
|
|
236
|
+
}
|
|
237
|
+
catch (parseErr) {
|
|
238
|
+
console.error(`❌ [${this.topic}] JSON parse error at offset ${payload.message.offset} — skipping malformed message`);
|
|
239
|
+
if (this.dlqProducer) {
|
|
240
|
+
yield this.publishToDlq(payload.message.value.toString(), parseErr, payload.partition, payload.message.offset);
|
|
241
|
+
}
|
|
242
|
+
yield this.consumer.commitOffsets([{
|
|
243
|
+
topic: payload.topic,
|
|
244
|
+
partition: payload.partition,
|
|
245
|
+
offset: (BigInt(payload.message.offset) + BigInt(1)).toString()
|
|
246
|
+
}]);
|
|
247
|
+
this.currentPayload = undefined;
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
139
250
|
try {
|
|
140
|
-
const data = JSON.parse(payload.message.value.toString());
|
|
141
251
|
yield this.onMessage(data, payload);
|
|
142
252
|
// Note: Child listeners MUST call this.ack() manually after successful processing
|
|
143
253
|
// If they don't call ack(), the message will be redelivered after session timeout
|
|
144
254
|
// This ensures at-least-once delivery semantics
|
|
145
255
|
}
|
|
146
256
|
catch (error) {
|
|
257
|
+
// --- DLQ-based poison message handling ---
|
|
258
|
+
// If a DLQ producer is available, we handle the error here instead of
|
|
259
|
+
// crashing the consumer. The message is retried N times, then published
|
|
260
|
+
// to a DLQ topic and acked so the consumer can move on.
|
|
261
|
+
if (this.dlqProducer) {
|
|
262
|
+
if (this.isPermanentError(error)) {
|
|
263
|
+
console.error(`❌ [${this.topic}] Permanent error (${error.name || 'Error'}) at partition ${payload.partition} ` +
|
|
264
|
+
`offset ${payload.message.offset} — skipping retries, sending to DLQ`);
|
|
265
|
+
yield this.publishToDlq(data, error, payload.partition, payload.message.offset);
|
|
266
|
+
yield this.ack(payload);
|
|
267
|
+
this.currentPayload = undefined;
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
// Retry transient errors with backoff
|
|
271
|
+
let lastRetryError = error;
|
|
272
|
+
for (let attempt = 2; attempt <= this.maxMessageRetries; attempt++) {
|
|
273
|
+
const delay = this.messageRetryBaseDelayMs * Math.pow(2, attempt - 2);
|
|
274
|
+
console.warn(`[${this.topic}] Retry ${attempt}/${this.maxMessageRetries} for offset ${payload.message.offset} ` +
|
|
275
|
+
`in ${delay}ms (error: ${lastRetryError.message})`);
|
|
276
|
+
yield new Promise(r => setTimeout(r, delay));
|
|
277
|
+
try {
|
|
278
|
+
this.currentPayload = payload;
|
|
279
|
+
yield this.onMessage(data, payload);
|
|
280
|
+
this.currentPayload = undefined;
|
|
281
|
+
return; // Retry succeeded — onMessage called ack()
|
|
282
|
+
}
|
|
283
|
+
catch (retryErr) {
|
|
284
|
+
lastRetryError = retryErr;
|
|
285
|
+
if (this.isPermanentError(retryErr)) {
|
|
286
|
+
console.error(`❌ [${this.topic}] Permanent error on retry ${attempt} — sending to DLQ`);
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// All retries exhausted — DLQ + ack
|
|
292
|
+
console.error(`❌ [${this.topic}] All ${this.maxMessageRetries} retries exhausted for offset ${payload.message.offset} — sending to DLQ`);
|
|
293
|
+
yield this.publishToDlq(data, lastRetryError, payload.partition, payload.message.offset);
|
|
294
|
+
yield this.ack(payload);
|
|
295
|
+
this.currentPayload = undefined;
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
// --- Legacy behavior (no DLQ producer) ---
|
|
299
|
+
// Re-throw to trigger the crash handler, which will eventually process.exit(1)
|
|
147
300
|
console.error(`❌ [${this.topic}] Error processing message for topic: ${this.topic}, offset: ${payload.message.offset}`, error);
|
|
148
|
-
// In case of error, we don't commit the offset, so the message will be redelivered
|
|
149
|
-
// This ensures failed messages are retried
|
|
150
301
|
throw error;
|
|
151
302
|
}
|
|
152
303
|
finally {
|
|
@@ -198,3 +349,4 @@ class Listener {
|
|
|
198
349
|
}
|
|
199
350
|
}
|
|
200
351
|
exports.Listener = Listener;
|
|
352
|
+
Listener.registry = [];
|