@drarzter/kafka-client 0.5.7 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -96,7 +96,8 @@ function decodeHeaders(raw) {
96
96
  for (const [key, value] of Object.entries(raw)) {
97
97
  if (value === void 0) continue;
98
98
  if (Array.isArray(value)) {
99
- result[key] = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v).join(",");
99
+ const items = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v);
100
+ result[key] = items[items.length - 1] ?? "";
100
101
  } else {
101
102
  result[key] = Buffer.isBuffer(value) ? value.toString() : value;
102
103
  }
@@ -158,17 +159,23 @@ function resolveTopicName(topicOrDescriptor) {
158
159
  }
159
160
  return String(topicOrDescriptor);
160
161
  }
161
- function registerSchema(topicOrDesc, schemaRegistry) {
162
+ function registerSchema(topicOrDesc, schemaRegistry, logger) {
162
163
  if (topicOrDesc?.__schema) {
163
164
  const topic2 = resolveTopicName(topicOrDesc);
165
+ const existing = schemaRegistry.get(topic2);
166
+ if (existing && existing !== topicOrDesc.__schema) {
167
+ logger?.warn(
168
+ `Schema conflict for topic "${topic2}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
169
+ );
170
+ }
164
171
  schemaRegistry.set(topic2, topicOrDesc.__schema);
165
172
  }
166
173
  }
167
- async function validateMessage(topicOrDesc, message, deps) {
174
+ async function validateMessage(topicOrDesc, message, deps, ctx) {
168
175
  const topicName = resolveTopicName(topicOrDesc);
169
176
  if (topicOrDesc?.__schema) {
170
177
  try {
171
- return await topicOrDesc.__schema.parse(message);
178
+ return await topicOrDesc.__schema.parse(message, ctx);
172
179
  } catch (error) {
173
180
  throw new KafkaValidationError(topicName, message, {
174
181
  cause: error instanceof Error ? error : new Error(String(error))
@@ -179,7 +186,7 @@ async function validateMessage(topicOrDesc, message, deps) {
179
186
  const schema = deps.schemaRegistry.get(topicOrDesc);
180
187
  if (schema) {
181
188
  try {
182
- return await schema.parse(message);
189
+ return await schema.parse(message, ctx);
183
190
  } catch (error) {
184
191
  throw new KafkaValidationError(topicName, message, {
185
192
  cause: error instanceof Error ? error : new Error(String(error))
@@ -202,9 +209,14 @@ async function buildSendPayload(topicOrDesc, messages, deps) {
202
209
  for (const inst of deps.instrumentation) {
203
210
  inst.beforeSend?.(topic2, envelopeHeaders);
204
211
  }
212
+ const sendCtx = {
213
+ topic: topic2,
214
+ headers: envelopeHeaders,
215
+ version: m.schemaVersion ?? 1
216
+ };
205
217
  return {
206
218
  value: JSON.stringify(
207
- await validateMessage(topicOrDesc, m.value, deps)
219
+ await validateMessage(topicOrDesc, m.value, deps, sendCtx)
208
220
  ),
209
221
  key: m.key ?? null,
210
222
  headers: envelopeHeaders
@@ -248,19 +260,26 @@ function getOrCreateConsumer(groupId, fromBeginning, autoCommit, deps) {
248
260
  consumers.set(groupId, consumer);
249
261
  return consumer;
250
262
  }
251
- function buildSchemaMap(topics, schemaRegistry, optionSchemas) {
263
+ function buildSchemaMap(topics, schemaRegistry, optionSchemas, logger) {
252
264
  const schemaMap = /* @__PURE__ */ new Map();
265
+ const registerChecked = (name, schema) => {
266
+ const existing = schemaRegistry.get(name);
267
+ if (existing && existing !== schema) {
268
+ logger?.warn(
269
+ `Schema conflict for topic "${name}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
270
+ );
271
+ }
272
+ schemaMap.set(name, schema);
273
+ schemaRegistry.set(name, schema);
274
+ };
253
275
  for (const t of topics) {
254
276
  if (t?.__schema) {
255
- const name = resolveTopicName(t);
256
- schemaMap.set(name, t.__schema);
257
- schemaRegistry.set(name, t.__schema);
277
+ registerChecked(resolveTopicName(t), t.__schema);
258
278
  }
259
279
  }
260
280
  if (optionSchemas) {
261
281
  for (const [k, v] of optionSchemas) {
262
- schemaMap.set(k, v);
263
- schemaRegistry.set(k, v);
282
+ registerChecked(k, v);
264
283
  }
265
284
  }
266
285
  return schemaMap;
@@ -287,8 +306,13 @@ function parseJsonMessage(raw, topic2, logger) {
287
306
  async function validateWithSchema(message, raw, topic2, schemaMap, interceptors, dlq, deps) {
288
307
  const schema = schemaMap.get(topic2);
289
308
  if (!schema) return message;
309
+ const ctx = {
310
+ topic: topic2,
311
+ headers: deps.originalHeaders ?? {},
312
+ version: Number(deps.originalHeaders?.["x-schema-version"] ?? 1)
313
+ };
290
314
  try {
291
- return await schema.parse(message);
315
+ return await schema.parse(message, ctx);
292
316
  } catch (error) {
293
317
  const err = toError(error);
294
318
  const validationError = new KafkaValidationError(topic2, message, {
@@ -325,7 +349,7 @@ async function validateWithSchema(message, raw, topic2, schemaMap, interceptors,
325
349
  return null;
326
350
  }
327
351
  }
328
- async function sendToDlq(topic2, rawMessage, deps, meta) {
352
+ function buildDlqPayload(topic2, rawMessage, meta) {
329
353
  const dlqTopic = `${topic2}.dlq`;
330
354
  const headers = {
331
355
  ...meta?.originalHeaders ?? {},
@@ -335,54 +359,82 @@ async function sendToDlq(topic2, rawMessage, deps, meta) {
335
359
  "x-dlq-error-stack": meta?.error.stack?.slice(0, 2e3) ?? "",
336
360
  "x-dlq-attempt-count": String(meta?.attempt ?? 0)
337
361
  };
362
+ return { topic: dlqTopic, messages: [{ value: rawMessage, headers }] };
363
+ }
364
+ async function sendToDlq(topic2, rawMessage, deps, meta) {
365
+ const payload = buildDlqPayload(topic2, rawMessage, meta);
338
366
  try {
339
- await deps.producer.send({
340
- topic: dlqTopic,
341
- messages: [{ value: rawMessage, headers }]
342
- });
343
- deps.logger.warn(`Message sent to DLQ: ${dlqTopic}`);
367
+ await deps.producer.send(payload);
368
+ deps.logger.warn(`Message sent to DLQ: ${payload.topic}`);
344
369
  } catch (error) {
345
- deps.logger.error(
346
- `Failed to send message to DLQ ${dlqTopic}:`,
347
- toError(error).stack
348
- );
370
+ const err = toError(error);
371
+ deps.logger.error(`Failed to send message to DLQ ${payload.topic}:`, err.stack);
372
+ await deps.onMessageLost?.({
373
+ topic: topic2,
374
+ error: err,
375
+ attempt: meta?.attempt ?? 0,
376
+ headers: meta?.originalHeaders ?? {}
377
+ });
349
378
  }
350
379
  }
351
380
  var RETRY_HEADER_ATTEMPT = "x-retry-attempt";
352
381
  var RETRY_HEADER_AFTER = "x-retry-after";
353
382
  var RETRY_HEADER_MAX_RETRIES = "x-retry-max-retries";
354
383
  var RETRY_HEADER_ORIGINAL_TOPIC = "x-retry-original-topic";
355
- async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
384
+ function buildRetryTopicPayload(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders) {
356
385
  const retryTopic = `${originalTopic}.retry.${attempt}`;
357
- const {
358
- [RETRY_HEADER_ATTEMPT]: _a,
359
- [RETRY_HEADER_AFTER]: _b,
360
- [RETRY_HEADER_MAX_RETRIES]: _c,
361
- [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
362
- ...userHeaders
363
- } = originalHeaders;
364
- const headers = {
365
- ...userHeaders,
366
- [RETRY_HEADER_ATTEMPT]: String(attempt),
367
- [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
368
- [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
369
- [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
386
+ function buildHeaders(hdr) {
387
+ const {
388
+ [RETRY_HEADER_ATTEMPT]: _a,
389
+ [RETRY_HEADER_AFTER]: _b,
390
+ [RETRY_HEADER_MAX_RETRIES]: _c,
391
+ [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
392
+ ...userHeaders
393
+ } = hdr;
394
+ return {
395
+ ...userHeaders,
396
+ [RETRY_HEADER_ATTEMPT]: String(attempt),
397
+ [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
398
+ [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
399
+ [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
400
+ };
401
+ }
402
+ return {
403
+ topic: retryTopic,
404
+ messages: rawMessages.map((value, i) => ({
405
+ value,
406
+ headers: buildHeaders(
407
+ Array.isArray(originalHeaders) ? originalHeaders[i] ?? {} : originalHeaders
408
+ )
409
+ }))
370
410
  };
411
+ }
412
+ async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
413
+ const payload = buildRetryTopicPayload(
414
+ originalTopic,
415
+ rawMessages,
416
+ attempt,
417
+ maxRetries,
418
+ delayMs,
419
+ originalHeaders
420
+ );
371
421
  try {
372
- for (const raw of rawMessages) {
373
- await deps.producer.send({
374
- topic: retryTopic,
375
- messages: [{ value: raw, headers }]
376
- });
377
- }
422
+ await deps.producer.send(payload);
378
423
  deps.logger.warn(
379
- `Message queued in retry topic ${retryTopic} (attempt ${attempt}/${maxRetries})`
424
+ `Message queued in retry topic ${payload.topic} (attempt ${attempt}/${maxRetries})`
380
425
  );
381
426
  } catch (error) {
427
+ const err = toError(error);
382
428
  deps.logger.error(
383
- `Failed to send message to retry topic ${retryTopic}:`,
384
- toError(error).stack
429
+ `Failed to send message to retry topic ${payload.topic}:`,
430
+ err.stack
385
431
  );
432
+ await deps.onMessageLost?.({
433
+ topic: originalTopic,
434
+ error: err,
435
+ attempt,
436
+ headers: Array.isArray(originalHeaders) ? originalHeaders[0] ?? {} : originalHeaders
437
+ });
386
438
  }
387
439
  }
388
440
  async function broadcastToInterceptors(envelopes, interceptors, cb) {
@@ -394,11 +446,17 @@ async function broadcastToInterceptors(envelopes, interceptors, cb) {
394
446
  }
395
447
  async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentation) {
396
448
  const cleanups = [];
449
+ const wraps = [];
397
450
  try {
398
451
  for (const env of envelopes) {
399
452
  for (const inst of instrumentation) {
400
- const cleanup = inst.beforeConsume?.(env);
401
- if (typeof cleanup === "function") cleanups.push(cleanup);
453
+ const result = inst.beforeConsume?.(env);
454
+ if (typeof result === "function") {
455
+ cleanups.push(result);
456
+ } else if (result) {
457
+ if (result.cleanup) cleanups.push(result.cleanup);
458
+ if (result.wrap) wraps.push(result.wrap);
459
+ }
402
460
  }
403
461
  }
404
462
  for (const env of envelopes) {
@@ -406,7 +464,13 @@ async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentati
406
464
  await interceptor.before?.(env);
407
465
  }
408
466
  }
409
- await fn();
467
+ let runFn = fn;
468
+ for (let i = wraps.length - 1; i >= 0; i--) {
469
+ const wrap = wraps[i];
470
+ const inner = runFn;
471
+ runFn = () => wrap(inner);
472
+ }
473
+ await runFn();
410
474
  for (const env of envelopes) {
411
475
  for (const interceptor of interceptors) {
412
476
  await interceptor.after?.(env);
@@ -476,7 +540,7 @@ async function executeWithRetry(fn, ctx, deps) {
476
540
  1,
477
541
  retry.maxRetries,
478
542
  delay,
479
- envelopes[0]?.headers ?? {},
543
+ isBatch ? envelopes.map((e) => e.headers) : envelopes[0]?.headers ?? {},
480
544
  deps
481
545
  );
482
546
  } else if (isLastAttempt) {
@@ -499,7 +563,7 @@ async function executeWithRetry(fn, ctx, deps) {
499
563
  }
500
564
  } else {
501
565
  const cap = Math.min(backoffMs * 2 ** (attempt - 1), maxBackoffMs);
502
- await sleep(Math.random() * cap);
566
+ await sleep(Math.floor(Math.random() * cap));
503
567
  }
504
568
  }
505
569
  }
@@ -578,6 +642,7 @@ async function handleEachBatch(payload, opts, deps) {
578
642
  interceptors,
579
643
  dlq,
580
644
  retry,
645
+ retryTopics,
581
646
  timeoutMs,
582
647
  wrapWithTimeout
583
648
  } = opts;
@@ -612,11 +677,12 @@ async function handleEachBatch(payload, opts, deps) {
612
677
  },
613
678
  {
614
679
  envelope: envelopes,
615
- rawMessages: batch.messages.filter((m) => m.value).map((m) => m.value.toString()),
680
+ rawMessages,
616
681
  interceptors,
617
682
  dlq,
618
683
  retry,
619
- isBatch: true
684
+ isBatch: true,
685
+ retryTopics
620
686
  },
621
687
  deps
622
688
  );
@@ -633,10 +699,11 @@ async function subscribeWithRetry(consumer, topics, logger, retryOpts) {
633
699
  } catch (error) {
634
700
  if (attempt === maxAttempts) throw error;
635
701
  const msg = toError(error).message;
702
+ const delay = Math.floor(Math.random() * backoffMs);
636
703
  logger.warn(
637
- `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${backoffMs}ms...`
704
+ `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${delay}ms...`
638
705
  );
639
- await sleep(backoffMs);
706
+ await sleep(delay);
640
707
  }
641
708
  }
642
709
  }
@@ -665,7 +732,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
665
732
  onMessageLost,
666
733
  ensureTopic,
667
734
  getOrCreateConsumer: getOrCreateConsumer2,
668
- runningConsumers
735
+ runningConsumers,
736
+ createRetryTxProducer
669
737
  } = deps;
670
738
  const backoffMs = retry.backoffMs ?? 1e3;
671
739
  const maxBackoffMs = retry.maxBackoffMs ?? 3e4;
@@ -673,6 +741,7 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
673
741
  for (const lt of levelTopics) {
674
742
  await ensureTopic(lt);
675
743
  }
744
+ const levelTxProducer = await createRetryTxProducer(`${levelGroupId}-tx`);
676
745
  const consumer = getOrCreateConsumer2(levelGroupId, false, false);
677
746
  await consumer.connect();
678
747
  await subscribeWithRetry(consumer, levelTopics, logger);
@@ -761,22 +830,67 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
761
830
  const nextLevel = level + 1;
762
831
  const cap = Math.min(backoffMs * 2 ** level, maxBackoffMs);
763
832
  const delay = Math.floor(Math.random() * cap);
764
- await sendToRetryTopic(
833
+ const { topic: rtTopic, messages: rtMsgs } = buildRetryTopicPayload(
765
834
  originalTopic,
766
835
  [raw],
767
836
  nextLevel,
768
837
  currentMaxRetries,
769
838
  delay,
770
- headers,
771
- pipelineDeps
839
+ headers
772
840
  );
841
+ const tx = await levelTxProducer.transaction();
842
+ try {
843
+ await tx.send({ topic: rtTopic, messages: rtMsgs });
844
+ await tx.sendOffsets({
845
+ consumer,
846
+ topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
847
+ });
848
+ await tx.commit();
849
+ logger.warn(
850
+ `Message routed to ${rtTopic} (EOS, level ${nextLevel}/${currentMaxRetries})`
851
+ );
852
+ } catch (txErr) {
853
+ try {
854
+ await tx.abort();
855
+ } catch {
856
+ }
857
+ logger.error(
858
+ `EOS routing to ${rtTopic} failed \u2014 message will be redelivered:`,
859
+ toError(txErr).stack
860
+ );
861
+ return;
862
+ }
773
863
  } else if (dlq) {
774
- await sendToDlq(originalTopic, raw, pipelineDeps, {
775
- error,
776
- // +1 to account for the main consumer's initial attempt before routing.
777
- attempt: level + 1,
778
- originalHeaders: headers
779
- });
864
+ const { topic: dTopic, messages: dMsgs } = buildDlqPayload(
865
+ originalTopic,
866
+ raw,
867
+ {
868
+ error,
869
+ // +1 to account for the main consumer's initial attempt before routing.
870
+ attempt: level + 1,
871
+ originalHeaders: headers
872
+ }
873
+ );
874
+ const tx = await levelTxProducer.transaction();
875
+ try {
876
+ await tx.send({ topic: dTopic, messages: dMsgs });
877
+ await tx.sendOffsets({
878
+ consumer,
879
+ topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
880
+ });
881
+ await tx.commit();
882
+ logger.warn(`Message sent to DLQ: ${dTopic} (EOS)`);
883
+ } catch (txErr) {
884
+ try {
885
+ await tx.abort();
886
+ } catch {
887
+ }
888
+ logger.error(
889
+ `EOS DLQ routing to ${dTopic} failed \u2014 message will be redelivered:`,
890
+ toError(txErr).stack
891
+ );
892
+ return;
893
+ }
780
894
  } else {
781
895
  await onMessageLost?.({
782
896
  topic: originalTopic,
@@ -784,8 +898,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
784
898
  attempt: level,
785
899
  headers
786
900
  });
901
+ await consumer.commitOffsets([nextOffset]);
787
902
  }
788
- await consumer.commitOffsets([nextOffset]);
789
903
  }
790
904
  });
791
905
  runningConsumers.set(levelGroupId, "eachMessage");
@@ -823,6 +937,8 @@ var KafkaClient = class {
823
937
  kafka;
824
938
  producer;
825
939
  txProducer;
940
+ /** Maps transactionalId → Producer for each active retry level consumer. */
941
+ retryTxProducers = /* @__PURE__ */ new Map();
826
942
  consumers = /* @__PURE__ */ new Map();
827
943
  admin;
828
944
  logger;
@@ -840,6 +956,8 @@ var KafkaClient = class {
840
956
  onMessageLost;
841
957
  onRebalance;
842
958
  isAdminConnected = false;
959
+ inFlightTotal = 0;
960
+ drainResolvers = [];
843
961
  clientId;
844
962
  constructor(clientId, groupId, brokers, options) {
845
963
  this.clientId = clientId;
@@ -847,7 +965,8 @@ var KafkaClient = class {
847
965
  this.logger = options?.logger ?? {
848
966
  log: (msg) => console.log(`[KafkaClient:${clientId}] ${msg}`),
849
967
  warn: (msg, ...args) => console.warn(`[KafkaClient:${clientId}] ${msg}`, ...args),
850
- error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args)
968
+ error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args),
969
+ debug: (msg, ...args) => console.debug(`[KafkaClient:${clientId}] ${msg}`, ...args)
851
970
  };
852
971
  this.autoCreateTopicsEnabled = options?.autoCreateTopics ?? false;
853
972
  this.strictSchemasEnabled = options?.strictSchemas ?? true;
@@ -891,7 +1010,7 @@ var KafkaClient = class {
891
1010
  /** Execute multiple sends atomically. Commits on success, aborts on error. */
892
1011
  async transaction(fn) {
893
1012
  if (!this.txProducer) {
894
- this.txProducer = this.kafka.producer({
1013
+ const p = this.kafka.producer({
895
1014
  kafkaJS: {
896
1015
  acks: -1,
897
1016
  idempotent: true,
@@ -899,7 +1018,8 @@ var KafkaClient = class {
899
1018
  maxInFlightRequests: 1
900
1019
  }
901
1020
  });
902
- await this.txProducer.connect();
1021
+ await p.connect();
1022
+ this.txProducer = p;
903
1023
  }
904
1024
  const tx = await this.txProducer.transaction();
905
1025
  try {
@@ -916,9 +1036,12 @@ var KafkaClient = class {
916
1036
  }
917
1037
  ]);
918
1038
  await tx.send(payload);
1039
+ this.notifyAfterSend(payload.topic, payload.messages.length);
919
1040
  },
920
1041
  sendBatch: async (topicOrDesc, messages) => {
921
- await tx.send(await this.preparePayload(topicOrDesc, messages));
1042
+ const payload = await this.preparePayload(topicOrDesc, messages);
1043
+ await tx.send(payload);
1044
+ this.notifyAfterSend(payload.topic, payload.messages.length);
922
1045
  }
923
1046
  };
924
1047
  await fn(ctx);
@@ -955,23 +1078,28 @@ var KafkaClient = class {
955
1078
  const deps = this.messageDeps;
956
1079
  const timeoutMs = options.handlerTimeoutMs;
957
1080
  await consumer.run({
958
- eachMessage: (payload) => handleEachMessage(
959
- payload,
960
- {
961
- schemaMap,
962
- handleMessage,
963
- interceptors,
964
- dlq,
965
- retry,
966
- retryTopics: options.retryTopics,
967
- timeoutMs,
968
- wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
969
- },
970
- deps
1081
+ eachMessage: (payload) => this.trackInFlight(
1082
+ () => handleEachMessage(
1083
+ payload,
1084
+ {
1085
+ schemaMap,
1086
+ handleMessage,
1087
+ interceptors,
1088
+ dlq,
1089
+ retry,
1090
+ retryTopics: options.retryTopics,
1091
+ timeoutMs,
1092
+ wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
1093
+ },
1094
+ deps
1095
+ )
971
1096
  )
972
1097
  });
973
1098
  this.runningConsumers.set(gid, "eachMessage");
974
1099
  if (options.retryTopics && retry) {
1100
+ if (!this.autoCreateTopicsEnabled) {
1101
+ await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
1102
+ }
975
1103
  const companions = await startRetryTopicConsumers(
976
1104
  topicNames,
977
1105
  gid,
@@ -988,25 +1116,65 @@ var KafkaClient = class {
988
1116
  return { groupId: gid, stop: () => this.stopConsumer(gid) };
989
1117
  }
990
1118
  async startBatchConsumer(topics, handleBatch, options = {}) {
991
- const { consumer, schemaMap, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
1119
+ if (options.retryTopics && !options.retry) {
1120
+ throw new Error(
1121
+ "retryTopics requires retry to be configured \u2014 set retry.maxRetries to enable the retry topic chain"
1122
+ );
1123
+ }
1124
+ if (options.autoCommit !== false) {
1125
+ this.logger.debug?.(
1126
+ `startBatchConsumer: autoCommit is enabled (default true). If your handler calls resolveOffset() or commitOffsetsIfNecessary(), set autoCommit: false to avoid offset conflicts.`
1127
+ );
1128
+ }
1129
+ const { consumer, schemaMap, topicNames, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
992
1130
  const deps = this.messageDeps;
993
1131
  const timeoutMs = options.handlerTimeoutMs;
994
1132
  await consumer.run({
995
- eachBatch: (payload) => handleEachBatch(
996
- payload,
997
- {
998
- schemaMap,
999
- handleBatch,
1000
- interceptors,
1001
- dlq,
1002
- retry,
1003
- timeoutMs,
1004
- wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
1005
- },
1006
- deps
1133
+ eachBatch: (payload) => this.trackInFlight(
1134
+ () => handleEachBatch(
1135
+ payload,
1136
+ {
1137
+ schemaMap,
1138
+ handleBatch,
1139
+ interceptors,
1140
+ dlq,
1141
+ retry,
1142
+ retryTopics: options.retryTopics,
1143
+ timeoutMs,
1144
+ wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
1145
+ },
1146
+ deps
1147
+ )
1007
1148
  )
1008
1149
  });
1009
1150
  this.runningConsumers.set(gid, "eachBatch");
1151
+ if (options.retryTopics && retry) {
1152
+ if (!this.autoCreateTopicsEnabled) {
1153
+ await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
1154
+ }
1155
+ const handleMessageForRetry = (env) => handleBatch([env], {
1156
+ partition: env.partition,
1157
+ highWatermark: env.offset,
1158
+ heartbeat: async () => {
1159
+ },
1160
+ resolveOffset: () => {
1161
+ },
1162
+ commitOffsetsIfNecessary: async () => {
1163
+ }
1164
+ });
1165
+ const companions = await startRetryTopicConsumers(
1166
+ topicNames,
1167
+ gid,
1168
+ handleMessageForRetry,
1169
+ retry,
1170
+ dlq,
1171
+ interceptors,
1172
+ schemaMap,
1173
+ this.retryTopicDeps,
1174
+ options.retryTopicAssignmentTimeoutMs
1175
+ );
1176
+ this.companionGroupIds.set(gid, companions);
1177
+ }
1010
1178
  return { groupId: gid, stop: () => this.stopConsumer(gid) };
1011
1179
  }
1012
1180
  // ── Consumer lifecycle ───────────────────────────────────────────
@@ -1036,18 +1204,32 @@ var KafkaClient = class {
1036
1204
  this.consumerCreationOptions.delete(cGroupId);
1037
1205
  this.logger.log(`Retry consumer disconnected: group "${cGroupId}"`);
1038
1206
  }
1207
+ const txId = `${cGroupId}-tx`;
1208
+ const txProducer = this.retryTxProducers.get(txId);
1209
+ if (txProducer) {
1210
+ await txProducer.disconnect().catch(() => {
1211
+ });
1212
+ this.retryTxProducers.delete(txId);
1213
+ }
1039
1214
  }
1040
1215
  this.companionGroupIds.delete(groupId);
1041
1216
  } else {
1042
- const tasks = Array.from(this.consumers.values()).map(
1043
- (c) => c.disconnect().catch(() => {
1044
- })
1045
- );
1217
+ const tasks = [
1218
+ ...Array.from(this.consumers.values()).map(
1219
+ (c) => c.disconnect().catch(() => {
1220
+ })
1221
+ ),
1222
+ ...Array.from(this.retryTxProducers.values()).map(
1223
+ (p) => p.disconnect().catch(() => {
1224
+ })
1225
+ )
1226
+ ];
1046
1227
  await Promise.allSettled(tasks);
1047
1228
  this.consumers.clear();
1048
1229
  this.runningConsumers.clear();
1049
1230
  this.consumerCreationOptions.clear();
1050
1231
  this.companionGroupIds.clear();
1232
+ this.retryTxProducers.clear();
1051
1233
  this.logger.log("All consumers disconnected");
1052
1234
  }
1053
1235
  }
@@ -1055,17 +1237,24 @@ var KafkaClient = class {
1055
1237
  * Query consumer group lag per partition.
1056
1238
  * Lag = broker high-watermark − last committed offset.
1057
1239
  * A committed offset of -1 (nothing committed yet) counts as full lag.
1240
+ *
1241
+ * Returns an empty array when the consumer group has never committed any
1242
+ * offsets (freshly created group, `autoCommit: false` with no manual commits,
1243
+ * or group not yet assigned). This is a Kafka protocol limitation:
1244
+ * `fetchOffsets` only returns data for topic-partitions that have at least one
1245
+ * committed offset. Use `checkStatus()` to verify broker connectivity in that case.
1058
1246
  */
1059
1247
  async getConsumerLag(groupId) {
1060
1248
  const gid = groupId ?? this.defaultGroupId;
1061
- if (!this.isAdminConnected) {
1062
- await this.admin.connect();
1063
- this.isAdminConnected = true;
1064
- }
1249
+ await this.ensureAdminConnected();
1065
1250
  const committedByTopic = await this.admin.fetchOffsets({ groupId: gid });
1251
+ const brokerOffsetsAll = await Promise.all(
1252
+ committedByTopic.map(({ topic: topic2 }) => this.admin.fetchTopicOffsets(topic2))
1253
+ );
1066
1254
  const result = [];
1067
- for (const { topic: topic2, partitions } of committedByTopic) {
1068
- const brokerOffsets = await this.admin.fetchTopicOffsets(topic2);
1255
+ for (let i = 0; i < committedByTopic.length; i++) {
1256
+ const { topic: topic2, partitions } = committedByTopic[i];
1257
+ const brokerOffsets = brokerOffsetsAll[i];
1069
1258
  for (const { partition, offset } of partitions) {
1070
1259
  const broker = brokerOffsets.find((o) => o.partition === partition);
1071
1260
  if (!broker) continue;
@@ -1080,10 +1269,7 @@ var KafkaClient = class {
1080
1269
  /** Check broker connectivity. Never throws — returns a discriminated union. */
1081
1270
  async checkStatus() {
1082
1271
  try {
1083
- if (!this.isAdminConnected) {
1084
- await this.admin.connect();
1085
- this.isAdminConnected = true;
1086
- }
1272
+ await this.ensureAdminConnected();
1087
1273
  const topics = await this.admin.listTopics();
1088
1274
  return { status: "up", clientId: this.clientId, topics };
1089
1275
  } catch (error) {
@@ -1098,12 +1284,17 @@ var KafkaClient = class {
1098
1284
  return this.clientId;
1099
1285
  }
1100
1286
  /** Gracefully disconnect producer, all consumers, and admin. */
1101
- async disconnect() {
1287
+ async disconnect(drainTimeoutMs = 3e4) {
1288
+ await this.waitForDrain(drainTimeoutMs);
1102
1289
  const tasks = [this.producer.disconnect()];
1103
1290
  if (this.txProducer) {
1104
1291
  tasks.push(this.txProducer.disconnect());
1105
1292
  this.txProducer = void 0;
1106
1293
  }
1294
+ for (const p of this.retryTxProducers.values()) {
1295
+ tasks.push(p.disconnect());
1296
+ }
1297
+ this.retryTxProducers.clear();
1107
1298
  for (const consumer of this.consumers.values()) {
1108
1299
  tasks.push(consumer.disconnect());
1109
1300
  }
@@ -1118,9 +1309,67 @@ var KafkaClient = class {
1118
1309
  this.companionGroupIds.clear();
1119
1310
  this.logger.log("All connections closed");
1120
1311
  }
1312
+ // ── Graceful shutdown ────────────────────────────────────────────
1313
+ /**
1314
+ * NestJS lifecycle hook — called automatically when the host module is torn down.
1315
+ * Drains in-flight handlers and disconnects all producers, consumers, and admin.
1316
+ * `KafkaModule` relies on this method; no separate destroy provider is needed.
1317
+ */
1318
+ async onModuleDestroy() {
1319
+ await this.disconnect();
1320
+ }
1321
+ /**
1322
+ * Register SIGTERM / SIGINT handlers that drain in-flight messages before
1323
+ * disconnecting. Call this once after constructing the client in non-NestJS apps.
1324
+ * NestJS apps get drain for free via `onModuleDestroy` → `disconnect()`.
1325
+ */
1326
+ enableGracefulShutdown(signals = ["SIGTERM", "SIGINT"], drainTimeoutMs = 3e4) {
1327
+ const handler = () => {
1328
+ this.logger.log(
1329
+ "Shutdown signal received \u2014 draining in-flight handlers..."
1330
+ );
1331
+ this.disconnect(drainTimeoutMs).catch(
1332
+ (err) => this.logger.error(
1333
+ "Error during graceful shutdown:",
1334
+ toError(err).message
1335
+ )
1336
+ );
1337
+ };
1338
+ for (const signal of signals) {
1339
+ process.once(signal, handler);
1340
+ }
1341
+ }
1342
+ trackInFlight(fn) {
1343
+ this.inFlightTotal++;
1344
+ return fn().finally(() => {
1345
+ this.inFlightTotal--;
1346
+ if (this.inFlightTotal === 0) {
1347
+ this.drainResolvers.splice(0).forEach((r) => r());
1348
+ }
1349
+ });
1350
+ }
1351
+ waitForDrain(timeoutMs) {
1352
+ if (this.inFlightTotal === 0) return Promise.resolve();
1353
+ return new Promise((resolve) => {
1354
+ let handle;
1355
+ const onDrain = () => {
1356
+ clearTimeout(handle);
1357
+ resolve();
1358
+ };
1359
+ this.drainResolvers.push(onDrain);
1360
+ handle = setTimeout(() => {
1361
+ const idx = this.drainResolvers.indexOf(onDrain);
1362
+ if (idx !== -1) this.drainResolvers.splice(idx, 1);
1363
+ this.logger.warn(
1364
+ `Drain timed out after ${timeoutMs}ms \u2014 ${this.inFlightTotal} handler(s) still in flight`
1365
+ );
1366
+ resolve();
1367
+ }, timeoutMs);
1368
+ });
1369
+ }
1121
1370
  // ── Private helpers ──────────────────────────────────────────────
1122
1371
  async preparePayload(topicOrDesc, messages) {
1123
- registerSchema(topicOrDesc, this.schemaRegistry);
1372
+ registerSchema(topicOrDesc, this.schemaRegistry, this.logger);
1124
1373
  const payload = await buildSendPayload(
1125
1374
  topicOrDesc,
1126
1375
  messages,
@@ -1153,12 +1402,78 @@ var KafkaClient = class {
1153
1402
  }, timeoutMs);
1154
1403
  return promise;
1155
1404
  }
1156
- async ensureTopic(topic2) {
1157
- if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
1158
- if (!this.isAdminConnected) {
1405
+ /**
1406
+ * When `retryTopics: true` and `autoCreateTopics: false`, verify that every
1407
+ * `<topic>.retry.<level>` topic already exists. Throws a clear error at startup
1408
+ * rather than silently discovering missing topics on the first handler failure.
1409
+ */
1410
+ async validateRetryTopicsExist(topicNames, maxRetries) {
1411
+ await this.ensureAdminConnected();
1412
+ const existing = new Set(await this.admin.listTopics());
1413
+ const missing = [];
1414
+ for (const t of topicNames) {
1415
+ for (let level = 1; level <= maxRetries; level++) {
1416
+ const retryTopic = `${t}.retry.${level}`;
1417
+ if (!existing.has(retryTopic)) missing.push(retryTopic);
1418
+ }
1419
+ }
1420
+ if (missing.length > 0) {
1421
+ throw new Error(
1422
+ `retryTopics: true but the following retry topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
1423
+ );
1424
+ }
1425
+ }
1426
+ /**
1427
+ * When `autoCreateTopics` is disabled, verify that `<topic>.dlq` exists for every
1428
+ * consumed topic. Throws a clear error at startup rather than silently discovering
1429
+ * missing DLQ topics on the first handler failure.
1430
+ */
1431
+ async validateDlqTopicsExist(topicNames) {
1432
+ await this.ensureAdminConnected();
1433
+ const existing = new Set(await this.admin.listTopics());
1434
+ const missing = topicNames.filter((t) => !existing.has(`${t}.dlq`)).map((t) => `${t}.dlq`);
1435
+ if (missing.length > 0) {
1436
+ throw new Error(
1437
+ `dlq: true but the following DLQ topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
1438
+ );
1439
+ }
1440
+ }
1441
+ /**
1442
+ * Connect the admin client if not already connected.
1443
+ * The flag is only set to `true` after a successful connect — if `admin.connect()`
1444
+ * throws the flag remains `false` so the next call will retry the connection.
1445
+ */
1446
+ async ensureAdminConnected() {
1447
+ if (this.isAdminConnected) return;
1448
+ try {
1159
1449
  await this.admin.connect();
1160
1450
  this.isAdminConnected = true;
1451
+ } catch (err) {
1452
+ this.isAdminConnected = false;
1453
+ throw err;
1161
1454
  }
1455
+ }
1456
+ /**
1457
+ * Create and connect a transactional producer for EOS retry routing.
1458
+ * Each retry level consumer gets its own producer with a unique `transactionalId`
1459
+ * so Kafka can fence stale producers on restart without affecting other levels.
1460
+ */
1461
+ async createRetryTxProducer(transactionalId) {
1462
+ const p = this.kafka.producer({
1463
+ kafkaJS: {
1464
+ acks: -1,
1465
+ idempotent: true,
1466
+ transactionalId,
1467
+ maxInFlightRequests: 1
1468
+ }
1469
+ });
1470
+ await p.connect();
1471
+ this.retryTxProducers.set(transactionalId, p);
1472
+ return p;
1473
+ }
1474
+ async ensureTopic(topic2) {
1475
+ if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
1476
+ await this.ensureAdminConnected();
1162
1477
  await this.admin.createTopics({
1163
1478
  topics: [{ topic: topic2, numPartitions: this.numPartitions }]
1164
1479
  });
@@ -1182,6 +1497,12 @@ var KafkaClient = class {
1182
1497
  `Cannot use ${mode} on consumer group "${gid}" \u2014 it is already running with ${oppositeMode}. Use a different groupId for this consumer.`
1183
1498
  );
1184
1499
  }
1500
+ if (existingMode === mode) {
1501
+ const callerName = mode === "eachMessage" ? "startConsumer" : "startBatchConsumer";
1502
+ throw new Error(
1503
+ `${callerName}("${gid}") called twice \u2014 this group is already consuming. Call stopConsumer("${gid}") first or pass a different groupId.`
1504
+ );
1505
+ }
1185
1506
  const consumer = getOrCreateConsumer(
1186
1507
  gid,
1187
1508
  fromBeginning,
@@ -1191,7 +1512,8 @@ var KafkaClient = class {
1191
1512
  const schemaMap = buildSchemaMap(
1192
1513
  topics,
1193
1514
  this.schemaRegistry,
1194
- optionSchemas
1515
+ optionSchemas,
1516
+ this.logger
1195
1517
  );
1196
1518
  const topicNames = topics.map((t) => resolveTopicName(t));
1197
1519
  for (const t of topicNames) {
@@ -1201,6 +1523,9 @@ var KafkaClient = class {
1201
1523
  for (const t of topicNames) {
1202
1524
  await this.ensureTopic(`${t}.dlq`);
1203
1525
  }
1526
+ if (!this.autoCreateTopicsEnabled) {
1527
+ await this.validateDlqTopicsExist(topicNames);
1528
+ }
1204
1529
  }
1205
1530
  await consumer.connect();
1206
1531
  await subscribeWithRetry(
@@ -1219,7 +1544,8 @@ var KafkaClient = class {
1219
1544
  return {
1220
1545
  schemaRegistry: this.schemaRegistry,
1221
1546
  strictSchemasEnabled: this.strictSchemasEnabled,
1222
- instrumentation: this.instrumentation
1547
+ instrumentation: this.instrumentation,
1548
+ logger: this.logger
1223
1549
  };
1224
1550
  }
1225
1551
  get consumerOpsDeps() {
@@ -1247,7 +1573,8 @@ var KafkaClient = class {
1247
1573
  onMessageLost: this.onMessageLost,
1248
1574
  ensureTopic: (t) => this.ensureTopic(t),
1249
1575
  getOrCreateConsumer: (gid, fb, ac) => getOrCreateConsumer(gid, fb, ac, this.consumerOpsDeps),
1250
- runningConsumers: this.runningConsumers
1576
+ runningConsumers: this.runningConsumers,
1577
+ createRetryTxProducer: (txId) => this.createRetryTxProducer(txId)
1251
1578
  };
1252
1579
  }
1253
1580
  };
@@ -1390,11 +1717,7 @@ var KafkaModule = class {
1390
1717
  global: options.isGlobal ?? false,
1391
1718
  module: KafkaModule,
1392
1719
  imports: [import_core2.DiscoveryModule],
1393
- providers: [
1394
- kafkaClientProvider,
1395
- KafkaModule.buildDestroyProvider(token),
1396
- KafkaExplorer
1397
- ],
1720
+ providers: [kafkaClientProvider, KafkaExplorer],
1398
1721
  exports: [kafkaClientProvider]
1399
1722
  };
1400
1723
  }
@@ -1410,11 +1733,7 @@ var KafkaModule = class {
1410
1733
  global: asyncOptions.isGlobal ?? false,
1411
1734
  module: KafkaModule,
1412
1735
  imports: [...asyncOptions.imports || [], import_core2.DiscoveryModule],
1413
- providers: [
1414
- kafkaClientProvider,
1415
- KafkaModule.buildDestroyProvider(token),
1416
- KafkaExplorer
1417
- ],
1736
+ providers: [kafkaClientProvider, KafkaExplorer],
1418
1737
  exports: [kafkaClientProvider]
1419
1738
  };
1420
1739
  }
@@ -1436,15 +1755,6 @@ var KafkaModule = class {
1436
1755
  await client.connectProducer();
1437
1756
  return client;
1438
1757
  }
1439
- static buildDestroyProvider(token) {
1440
- return {
1441
- provide: `${token}_DESTROY`,
1442
- useFactory: (client) => ({
1443
- onModuleDestroy: () => client.disconnect()
1444
- }),
1445
- inject: [token]
1446
- };
1447
- }
1448
1758
  };
1449
1759
  KafkaModule = __decorateClass([
1450
1760
  (0, import_common3.Module)({})