@aichatwar/shared 1.0.170 → 1.0.171

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { Consumer, EachMessagePayload } from "kafkajs";
1
+ import { Consumer, Producer, EachMessagePayload } from "kafkajs";
2
2
  import { BaseEvent } from '../baseEvent';
3
3
  export type { EachMessagePayload };
4
4
  export declare abstract class Listener<T extends BaseEvent> {
@@ -20,9 +20,12 @@ export declare abstract class Listener<T extends BaseEvent> {
20
20
  healthy: boolean;
21
21
  }>;
22
22
  protected consumer: Consumer;
23
+ protected dlqProducer?: Producer;
23
24
  protected ackDeadline: number;
24
25
  protected fromBeginning: boolean;
25
26
  protected maxEventAgeMs: number;
27
+ protected maxMessageRetries: number;
28
+ protected messageRetryBaseDelayMs: number;
26
29
  private currentPayload?;
27
30
  private retryCount;
28
31
  private readonly maxInitialRetries;
@@ -35,8 +38,14 @@ export declare abstract class Listener<T extends BaseEvent> {
35
38
  private lastSuccessfulMessageAt;
36
39
  /** Expose consumer health for readiness probes */
37
40
  get healthy(): boolean;
38
- constructor(consumer: Consumer);
41
+ constructor(consumer: Consumer, dlqProducer?: Producer);
39
42
  private setupCrashHandler;
43
+ /**
44
+ * Detects errors that are guaranteed to fail on every retry (poison messages).
45
+ * These should go straight to DLQ without wasting retry attempts.
46
+ */
47
+ private isPermanentError;
48
+ private publishToDlq;
40
49
  ack(payload?: EachMessagePayload): Promise<void>;
41
50
  listen(): Promise<void>;
42
51
  }
@@ -34,10 +34,12 @@ class Listener {
34
34
  get healthy() {
35
35
  return this.isListening;
36
36
  }
37
- constructor(consumer) {
37
+ constructor(consumer, dlqProducer) {
38
38
  this.ackDeadline = 5 * 1000; // 5 seconds
39
39
  this.fromBeginning = false; // Override in subclasses to read from beginning
40
40
  this.maxEventAgeMs = 0; // 0 = disabled. When set, messages older than this are auto-acked and skipped.
41
+ this.maxMessageRetries = 3; // Message-level retries before DLQ
42
+ this.messageRetryBaseDelayMs = 1000;
41
43
  this.retryCount = 0;
42
44
  this.maxInitialRetries = 3; // Show detailed retry logs for first 3 attempts
43
45
  this.maxRetryDelay = 60000; // Cap delay at 60 seconds
@@ -48,6 +50,7 @@ class Listener {
48
50
  this.maxCrashRestartDelay = 120000; // 2 minutes cap
49
51
  this.lastSuccessfulMessageAt = 0; // Timestamp of last successfully processed message
50
52
  this.consumer = consumer;
53
+ this.dlqProducer = dlqProducer;
51
54
  Listener.registry.push(this);
52
55
  this.setupCrashHandler();
53
56
  }
@@ -93,6 +96,61 @@ class Listener {
93
96
  });
94
97
  this.crashHandlerSetup = true;
95
98
  }
99
+ /**
100
+ * Detects errors that are guaranteed to fail on every retry (poison messages).
101
+ * These should go straight to DLQ without wasting retry attempts.
102
+ */
103
+ isPermanentError(error) {
104
+ if (!error)
105
+ return false;
106
+ // Mongoose VersionError — document version has advanced, stale event can never match
107
+ if (error.name === 'VersionError')
108
+ return true;
109
+ // MongoDB E11000 duplicate key — record already exists
110
+ if (error.code === 11000)
111
+ return true;
112
+ // NotFoundError thrown in listener — document doesn't exist (out-of-order event)
113
+ if (error.name === 'NotFoundError' || error.statusCode === 404)
114
+ return true;
115
+ // ValidationError — schema violation, data itself is invalid
116
+ if (error.name === 'ValidationError')
117
+ return true;
118
+ return false;
119
+ }
120
+ publishToDlq(data, error, partition, offset) {
121
+ return __awaiter(this, void 0, void 0, function* () {
122
+ if (!this.dlqProducer)
123
+ return;
124
+ const dlqTopic = `${this.topic}.dlq`;
125
+ try {
126
+ yield this.dlqProducer.send({
127
+ topic: dlqTopic,
128
+ messages: [{
129
+ key: (data === null || data === void 0 ? void 0 : data.id) || (data === null || data === void 0 ? void 0 : data.messageId) || null,
130
+ value: JSON.stringify({
131
+ originalData: data,
132
+ error: (error === null || error === void 0 ? void 0 : error.message) || 'Unknown error',
133
+ errorName: (error === null || error === void 0 ? void 0 : error.name) || 'Error',
134
+ errorCode: error === null || error === void 0 ? void 0 : error.code,
135
+ topic: this.topic,
136
+ groupId: this.groupId,
137
+ partition,
138
+ offset,
139
+ failedAt: new Date().toISOString(),
140
+ }),
141
+ timestamp: Date.now().toString(),
142
+ }],
143
+ });
144
+ console.warn(`☠️ [${this.topic}] Poison message sent to DLQ ${dlqTopic} ` +
145
+ `(partition: ${partition}, offset: ${offset}, error: ${(error === null || error === void 0 ? void 0 : error.name) || 'Error'}: ${error === null || error === void 0 ? void 0 : error.message})`);
146
+ }
147
+ catch (dlqErr) {
148
+ console.error(`[${this.topic}] CRITICAL: Failed to publish to DLQ ${dlqTopic}: ${dlqErr.message}. ` +
149
+ `Original error: ${error === null || error === void 0 ? void 0 : error.message}. Message at partition ${partition} offset ${offset} will be retried by crash handler.`);
150
+ throw error;
151
+ }
152
+ });
153
+ }
96
154
  // Manual acknowledgment method
97
155
  // Accepts optional payload for custom consumer patterns (e.g., realtime-gateway)
98
156
  ack(payload) {
@@ -140,14 +198,7 @@ class Listener {
140
198
  }
141
199
  // Reset retry count on successful connection
142
200
  this.retryCount = 0;
143
- console.log(`🚀 [${this.topic}] About to call consumer.run() with groupId: ${this.groupId}`);
144
- console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}`);
145
- console.log(`🚀 [${this.topic}] Consumer configuration:`, {
146
- groupId: this.groupId,
147
- topic: this.topic,
148
- fromBeginning: this.fromBeginning,
149
- autoCommit: false,
150
- });
201
+ console.log(`🚀 [${this.topic}] Starting consumer.run() with groupId: ${this.groupId}, dlq: ${this.dlqProducer ? 'enabled' : 'disabled'}`);
151
202
  yield this.consumer.run({
152
203
  // CRITICAL: Disable auto-commit to prevent message loss during rebalancing
153
204
  // Offsets are only committed when we explicitly call ack()
@@ -164,8 +215,6 @@ class Listener {
164
215
  }]);
165
216
  return;
166
217
  }
167
- // Commented out key logging as requested
168
- // console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, key: ${payload.message.key?.toString() || 'none'}`);
169
218
  console.log(`📨 [${this.topic}] Message received -> groupId: ${this.groupId}, partition: ${payload.partition}, offset: ${payload.message.offset}, value length: ${payload.message.value.toString().length}`);
170
219
  // Skip stale messages when maxEventAgeMs is configured
171
220
  if (this.maxEventAgeMs > 0 && payload.message.timestamp) {
@@ -181,17 +230,74 @@ class Listener {
181
230
  }
182
231
  // Store current payload for manual ack
183
232
  this.currentPayload = payload;
233
+ let data;
234
+ try {
235
+ data = JSON.parse(payload.message.value.toString());
236
+ }
237
+ catch (parseErr) {
238
+ console.error(`❌ [${this.topic}] JSON parse error at offset ${payload.message.offset} — skipping malformed message`);
239
+ if (this.dlqProducer) {
240
+ yield this.publishToDlq(payload.message.value.toString(), parseErr, payload.partition, payload.message.offset);
241
+ }
242
+ yield this.consumer.commitOffsets([{
243
+ topic: payload.topic,
244
+ partition: payload.partition,
245
+ offset: (BigInt(payload.message.offset) + BigInt(1)).toString()
246
+ }]);
247
+ this.currentPayload = undefined;
248
+ return;
249
+ }
184
250
  try {
185
- const data = JSON.parse(payload.message.value.toString());
186
251
  yield this.onMessage(data, payload);
187
252
  // Note: Child listeners MUST call this.ack() manually after successful processing
188
253
  // If they don't call ack(), the message will be redelivered after session timeout
189
254
  // This ensures at-least-once delivery semantics
190
255
  }
191
256
  catch (error) {
257
+ // --- DLQ-based poison message handling ---
258
+ // If a DLQ producer is available, we handle the error here instead of
259
+ // crashing the consumer. The message is retried N times, then published
260
+ // to a DLQ topic and acked so the consumer can move on.
261
+ if (this.dlqProducer) {
262
+ if (this.isPermanentError(error)) {
263
+ console.error(`❌ [${this.topic}] Permanent error (${error.name || 'Error'}) at partition ${payload.partition} ` +
264
+ `offset ${payload.message.offset} — skipping retries, sending to DLQ`);
265
+ yield this.publishToDlq(data, error, payload.partition, payload.message.offset);
266
+ yield this.ack(payload);
267
+ this.currentPayload = undefined;
268
+ return;
269
+ }
270
+ // Retry transient errors with backoff
271
+ let lastRetryError = error;
272
+ for (let attempt = 2; attempt <= this.maxMessageRetries; attempt++) {
273
+ const delay = this.messageRetryBaseDelayMs * Math.pow(2, attempt - 2);
274
+ console.warn(`[${this.topic}] Retry ${attempt}/${this.maxMessageRetries} for offset ${payload.message.offset} ` +
275
+ `in ${delay}ms (error: ${lastRetryError.message})`);
276
+ yield new Promise(r => setTimeout(r, delay));
277
+ try {
278
+ this.currentPayload = payload;
279
+ yield this.onMessage(data, payload);
280
+ this.currentPayload = undefined;
281
+ return; // Retry succeeded — onMessage called ack()
282
+ }
283
+ catch (retryErr) {
284
+ lastRetryError = retryErr;
285
+ if (this.isPermanentError(retryErr)) {
286
+ console.error(`❌ [${this.topic}] Permanent error on retry ${attempt} — sending to DLQ`);
287
+ break;
288
+ }
289
+ }
290
+ }
291
+ // All retries exhausted — DLQ + ack
292
+ console.error(`❌ [${this.topic}] All ${this.maxMessageRetries} retries exhausted for offset ${payload.message.offset} — sending to DLQ`);
293
+ yield this.publishToDlq(data, lastRetryError, payload.partition, payload.message.offset);
294
+ yield this.ack(payload);
295
+ this.currentPayload = undefined;
296
+ return;
297
+ }
298
+ // --- Legacy behavior (no DLQ producer) ---
299
+ // Re-throw to trigger the crash handler, which will eventually process.exit(1)
192
300
  console.error(`❌ [${this.topic}] Error processing message for topic: ${this.topic}, offset: ${payload.message.offset}`, error);
193
- // In case of error, we don't commit the offset, so the message will be redelivered
194
- // This ensures failed messages are retried
195
301
  throw error;
196
302
  }
197
303
  finally {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aichatwar/shared",
3
- "version": "1.0.170",
3
+ "version": "1.0.171",
4
4
  "main": "./build/index.js",
5
5
  "typs": "./build/index.d.ts",
6
6
  "files": [