@aichatwar/shared 1.0.169 → 1.0.171

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,31 @@
1
- import { Consumer, EachMessagePayload } from "kafkajs";
1
+ import { Consumer, Producer, EachMessagePayload } from "kafkajs";
2
2
  import { BaseEvent } from '../baseEvent';
3
3
  export type { EachMessagePayload };
4
4
  export declare abstract class Listener<T extends BaseEvent> {
5
5
  abstract topic: T['subject'];
6
6
  abstract groupId: string;
7
7
  abstract onMessage(data: T['data'], payload: EachMessagePayload): Promise<void>;
8
+ private static registry;
9
+ /**
10
+ * Returns true only if every registered listener's consumer is connected.
11
+ * Wire this into your /ready endpoint alongside the MongoDB check.
12
+ */
13
+ static allHealthy(): boolean;
14
+ /**
15
+ * Returns per-listener health details for diagnostics.
16
+ */
17
+ static healthDetails(): Array<{
18
+ topic: string;
19
+ groupId: string;
20
+ healthy: boolean;
21
+ }>;
8
22
  protected consumer: Consumer;
23
+ protected dlqProducer?: Producer;
9
24
  protected ackDeadline: number;
10
25
  protected fromBeginning: boolean;
11
26
  protected maxEventAgeMs: number;
27
+ protected maxMessageRetries: number;
28
+ protected messageRetryBaseDelayMs: number;
12
29
  private currentPayload?;
13
30
  private retryCount;
14
31
  private readonly maxInitialRetries;
@@ -16,9 +33,19 @@ export declare abstract class Listener<T extends BaseEvent> {
16
33
  private isListening;
17
34
  private crashHandlerSetup;
18
35
  private crashRestartCount;
36
+ private readonly maxCrashRestarts;
19
37
  private readonly maxCrashRestartDelay;
20
- constructor(consumer: Consumer);
38
+ private lastSuccessfulMessageAt;
39
+ /** Expose consumer health for readiness probes */
40
+ get healthy(): boolean;
41
+ constructor(consumer: Consumer, dlqProducer?: Producer);
21
42
  private setupCrashHandler;
43
+ /**
44
+ * Detects errors that are guaranteed to fail on every retry (poison messages).
45
+ * These should go straight to DLQ without wasting retry attempts.
46
+ */
47
+ private isPermanentError;
48
+ private publishToDlq;
22
49
  ack(payload?: EachMessagePayload): Promise<void>;
23
50
  listen(): Promise<void>;
24
51
  }
@@ -11,18 +11,47 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.Listener = void 0;
13
13
  class Listener {
14
- constructor(consumer) {
14
+ /**
15
+ * Returns true only if every registered listener's consumer is connected.
16
+ * Wire this into your /ready endpoint alongside the MongoDB check.
17
+ */
18
+ static allHealthy() {
19
+ if (Listener.registry.length === 0)
20
+ return true;
21
+ return Listener.registry.every(l => l.healthy);
22
+ }
23
+ /**
24
+ * Returns per-listener health details for diagnostics.
25
+ */
26
+ static healthDetails() {
27
+ return Listener.registry.map(l => ({
28
+ topic: l.topic,
29
+ groupId: l.groupId,
30
+ healthy: l.healthy,
31
+ }));
32
+ }
33
+ /** Expose consumer health for readiness probes */
34
+ get healthy() {
35
+ return this.isListening;
36
+ }
37
+ constructor(consumer, dlqProducer) {
15
38
  this.ackDeadline = 5 * 1000; // 5 seconds
16
39
  this.fromBeginning = false; // Override in subclasses to read from beginning
17
40
  this.maxEventAgeMs = 0; // 0 = disabled. When set, messages older than this are auto-acked and skipped.
41
+ this.maxMessageRetries = 3; // Message-level retries before DLQ
42
+ this.messageRetryBaseDelayMs = 1000;
18
43
  this.retryCount = 0;
19
44
  this.maxInitialRetries = 3; // Show detailed retry logs for first 3 attempts
20
45
  this.maxRetryDelay = 60000; // Cap delay at 60 seconds
21
46
  this.isListening = false; // Track if listener is active
22
47
  this.crashHandlerSetup = false; // Track if crash handler has been set up
23
48
  this.crashRestartCount = 0;
49
+ this.maxCrashRestarts = 5; // Exit process after this many consecutive crashes
24
50
  this.maxCrashRestartDelay = 120000; // 2 minutes cap
51
+ this.lastSuccessfulMessageAt = 0; // Timestamp of last successfully processed message
25
52
  this.consumer = consumer;
53
+ this.dlqProducer = dlqProducer;
54
+ Listener.registry.push(this);
26
55
  this.setupCrashHandler();
27
56
  }
28
57
  setupCrashHandler() {
@@ -37,23 +66,91 @@ class Listener {
37
66
  return;
38
67
  }
39
68
  this.crashRestartCount++;
69
+ this.isListening = false;
70
+ if (this.crashRestartCount >= this.maxCrashRestarts) {
71
+ console.error(`💀 [${this.topic}] Consumer crashed ${this.crashRestartCount} times without recovery. ` +
72
+ `Exiting process to let Kubernetes restart the pod with a fresh state.`);
73
+ process.exit(1);
74
+ }
40
75
  const delay = Math.min(5000 * Math.pow(2, Math.min(this.crashRestartCount - 1, 5)), this.maxCrashRestartDelay);
41
- console.error(`[${this.topic}] Consumer crashed (restart #${this.crashRestartCount}, ` +
76
+ console.error(`[${this.topic}] Consumer crashed (restart #${this.crashRestartCount}/${this.maxCrashRestarts}, ` +
42
77
  `retrying in ${delay}ms):`, error);
43
- this.isListening = false;
44
78
  try {
45
79
  yield this.consumer.disconnect();
46
80
  }
47
81
  catch (_) { /* best effort */ }
48
82
  setTimeout(() => {
49
- console.log(`[${this.topic}] Auto-restarting consumer after crash (attempt #${this.crashRestartCount})...`);
83
+ console.log(`[${this.topic}] Auto-restarting consumer after crash (attempt #${this.crashRestartCount}/${this.maxCrashRestarts})...`);
50
84
  this.listen().catch((err) => {
51
85
  console.error(`[${this.topic}] Auto-restart failed:`, err);
86
+ console.error(`💀 [${this.topic}] Auto-restart listen() threw. Exiting process to let Kubernetes restart the pod.`);
87
+ process.exit(1);
52
88
  });
53
89
  }, delay);
54
90
  }));
91
+ this.consumer.on('consumer.disconnect', () => {
92
+ if (this.isListening) {
93
+ console.warn(`⚠️ [${this.topic}] Consumer disconnected unexpectedly`);
94
+ this.isListening = false;
95
+ }
96
+ });
55
97
  this.crashHandlerSetup = true;
56
98
  }
99
+ /**
100
+ * Detects errors that are guaranteed to fail on every retry (poison messages).
101
+ * These should go straight to DLQ without wasting retry attempts.
102
+ */
103
+ isPermanentError(error) {
104
+ if (!error)
105
+ return false;
106
+ // Mongoose VersionError — document version has advanced, stale event can never match
107
+ if (error.name === 'VersionError')
108
+ return true;
109
+ // MongoDB E11000 duplicate key — record already exists
110
+ if (error.code === 11000)
111
+ return true;
112
+ // NotFoundError thrown in listener — document doesn't exist (out-of-order event)
113
+ if (error.name === 'NotFoundError' || error.statusCode === 404)
114
+ return true;
115
+ // ValidationError — schema violation, data itself is invalid
116
+ if (error.name === 'ValidationError')
117
+ return true;
118
+ return false;
119
+ }
120
+ publishToDlq(data, error, partition, offset) {
121
+ return __awaiter(this, void 0, void 0, function* () {
122
+ if (!this.dlqProducer)
123
+ return;
124
+ const dlqTopic = `${this.topic}.dlq`;
125
+ try {
126
+ yield this.dlqProducer.send({
127
+ topic: dlqTopic,
128
+ messages: [{
129
+ key: (data === null || data === void 0 ? void 0 : data.id) || (data === null || data === void 0 ? void 0 : data.messageId) || null,
130
+ value: JSON.stringify({
131
+ originalData: data,
132
+ error: (error === null || error === void 0 ? void 0 : error.message) || 'Unknown error',
133
+ errorName: (error === null || error === void 0 ? void 0 : error.name) || 'Error',
134
+ errorCode: error === null || error === void 0 ? void 0 : error.code,
135
+ topic: this.topic,
136
+ groupId: this.groupId,
137
+ partition,
138
+ offset,
139
+ failedAt: new Date().toISOString(),
140
+ }),
141
+ timestamp: Date.now().toString(),
142
+ }],
143
+ });
144
+ console.warn(`☠️ [${this.topic}] Poison message sent to DLQ ${dlqTopic} ` +
145
+ `(partition: ${partition}, offset: ${offset}, error: ${(error === null || error === void 0 ? void 0 : error.name) || 'Error'}: ${error === null || error === void 0 ? void 0 : error.message})`);
146
+ }
147
+ catch (dlqErr) {
148
+ console.error(`[${this.topic}] CRITICAL: Failed to publish to DLQ ${dlqTopic}: ${dlqErr.message}. ` +
149
+ `Original error: ${error === null || error === void 0 ? void 0 : error.message}. Message at partition ${partition} offset ${offset} will be retried by crash handler.`);
150
+ throw error;
151
+ }
152
+ });
153
+ }
57
154
  // Manual acknowledgment method
58
155
  // Accepts optional payload for custom consumer patterns (e.g., realtime-gateway)
59
156
  ack(payload) {
@@ -67,6 +164,12 @@ class Listener {
67
164
  partition: targetPayload.partition,
68
165
  offset: (BigInt(targetPayload.message.offset) + BigInt(1)).toString()
69
166
  }]);
167
+ // Reset crash counter on successful processing — proves the consumer is healthy
168
+ if (this.crashRestartCount > 0) {
169
+ console.log(`[${this.topic}] Crash counter reset (was ${this.crashRestartCount}) after successful message processing`);
170
+ this.crashRestartCount = 0;
171
+ }
172
+ this.lastSuccessfulMessageAt = Date.now();
70
173
  console.log(`Message manually acknowledged for topic: ${this.topic}`);
71
174
  });
72
175
  }
@@ -95,14 +198,7 @@ class Listener {
95
198
  }
96
199
  // Reset retry count on successful connection
97
200
  this.retryCount = 0;
98
- console.log(`🚀 [${this.topic}] About to call consumer.run() with groupId: ${this.groupId}`);
99
- console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}`);
100
- console.log(`🚀 [${this.topic}] Consumer configuration:`, {
101
- groupId: this.groupId,
102
- topic: this.topic,
103
- fromBeginning: this.fromBeginning,
104
- autoCommit: false,
105
- });
201
+ console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}, dlq: ${this.dlqProducer ? 'enabled' : 'disabled'}`);
106
202
  yield this.consumer.run({
107
203
  // CRITICAL: Disable auto-commit to prevent message loss during rebalancing
108
204
  // Offsets are only committed when we explicitly call ack()
@@ -119,8 +215,6 @@ class Listener {
119
215
  }]);
120
216
  return;
121
217
  }
122
- // Commented out key logging as requested
123
- // console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, key: ${payload.message.key?.toString() || 'none'}`);
124
218
  console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, value length: ${payload.message.value.toString().length}`);
125
219
  // Skip stale messages when maxEventAgeMs is configured
126
220
  if (this.maxEventAgeMs > 0 && payload.message.timestamp) {
@@ -136,17 +230,74 @@ class Listener {
136
230
  }
137
231
  // Store current payload for manual ack
138
232
  this.currentPayload = payload;
233
+ let data;
234
+ try {
235
+ data = JSON.parse(payload.message.value.toString());
236
+ }
237
+ catch (parseErr) {
238
+ console.error(`❌ [${this.topic}] JSON parse error at offset ${payload.message.offset} — skipping malformed message`);
239
+ if (this.dlqProducer) {
240
+ yield this.publishToDlq(payload.message.value.toString(), parseErr, payload.partition, payload.message.offset);
241
+ }
242
+ yield this.consumer.commitOffsets([{
243
+ topic: payload.topic,
244
+ partition: payload.partition,
245
+ offset: (BigInt(payload.message.offset) + BigInt(1)).toString()
246
+ }]);
247
+ this.currentPayload = undefined;
248
+ return;
249
+ }
139
250
  try {
140
- const data = JSON.parse(payload.message.value.toString());
141
251
  yield this.onMessage(data, payload);
142
252
  // Note: Child listeners MUST call this.ack() manually after successful processing
143
253
  // If they don't call ack(), the message will be redelivered after session timeout
144
254
  // This ensures at-least-once delivery semantics
145
255
  }
146
256
  catch (error) {
257
+ // --- DLQ-based poison message handling ---
258
+ // If a DLQ producer is available, we handle the error here instead of
259
+ // crashing the consumer. The message is retried N times, then published
260
+ // to a DLQ topic and acked so the consumer can move on.
261
+ if (this.dlqProducer) {
262
+ if (this.isPermanentError(error)) {
263
+ console.error(`❌ [${this.topic}] Permanent error (${error.name || 'Error'}) at partition ${payload.partition} ` +
264
+ `offset ${payload.message.offset} — skipping retries, sending to DLQ`);
265
+ yield this.publishToDlq(data, error, payload.partition, payload.message.offset);
266
+ yield this.ack(payload);
267
+ this.currentPayload = undefined;
268
+ return;
269
+ }
270
+ // Retry transient errors with backoff
271
+ let lastRetryError = error;
272
+ for (let attempt = 2; attempt <= this.maxMessageRetries; attempt++) {
273
+ const delay = this.messageRetryBaseDelayMs * Math.pow(2, attempt - 2);
274
+ console.warn(`[${this.topic}] Retry ${attempt}/${this.maxMessageRetries} for offset ${payload.message.offset} ` +
275
+ `in ${delay}ms (error: ${lastRetryError.message})`);
276
+ yield new Promise(r => setTimeout(r, delay));
277
+ try {
278
+ this.currentPayload = payload;
279
+ yield this.onMessage(data, payload);
280
+ this.currentPayload = undefined;
281
+ return; // Retry succeeded — onMessage called ack()
282
+ }
283
+ catch (retryErr) {
284
+ lastRetryError = retryErr;
285
+ if (this.isPermanentError(retryErr)) {
286
+ console.error(`❌ [${this.topic}] Permanent error on retry ${attempt} — sending to DLQ`);
287
+ break;
288
+ }
289
+ }
290
+ }
291
+ // All retries exhausted — DLQ + ack
292
+ console.error(`❌ [${this.topic}] All ${this.maxMessageRetries} retries exhausted for offset ${payload.message.offset} — sending to DLQ`);
293
+ yield this.publishToDlq(data, lastRetryError, payload.partition, payload.message.offset);
294
+ yield this.ack(payload);
295
+ this.currentPayload = undefined;
296
+ return;
297
+ }
298
+ // --- Legacy behavior (no DLQ producer) ---
299
+ // Re-throw to trigger the crash handler, which will eventually process.exit(1)
147
300
  console.error(`❌ [${this.topic}] Error processing message for topic: ${this.topic}, offset: ${payload.message.offset}`, error);
148
- // In case of error, we don't commit the offset, so the message will be redelivered
149
- // This ensures failed messages are retried
150
301
  throw error;
151
302
  }
152
303
  finally {
@@ -198,3 +349,4 @@ class Listener {
198
349
  }
199
350
  }
200
351
  exports.Listener = Listener;
352
+ Listener.registry = [];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aichatwar/shared",
3
- "version": "1.0.169",
3
+ "version": "1.0.171",
4
4
  "main": "./build/index.js",
5
5
  "typs": "./build/index.d.ts",
6
6
  "files": [