@drarzter/kafka-client 0.5.7 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,7 +39,8 @@ function decodeHeaders(raw) {
39
39
  for (const [key, value] of Object.entries(raw)) {
40
40
  if (value === void 0) continue;
41
41
  if (Array.isArray(value)) {
42
- result[key] = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v).join(",");
42
+ const items = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v);
43
+ result[key] = items[items.length - 1] ?? "";
43
44
  } else {
44
45
  result[key] = Buffer.isBuffer(value) ? value.toString() : value;
45
46
  }
@@ -101,17 +102,23 @@ function resolveTopicName(topicOrDescriptor) {
101
102
  }
102
103
  return String(topicOrDescriptor);
103
104
  }
104
- function registerSchema(topicOrDesc, schemaRegistry) {
105
+ function registerSchema(topicOrDesc, schemaRegistry, logger) {
105
106
  if (topicOrDesc?.__schema) {
106
107
  const topic2 = resolveTopicName(topicOrDesc);
108
+ const existing = schemaRegistry.get(topic2);
109
+ if (existing && existing !== topicOrDesc.__schema) {
110
+ logger?.warn(
111
+ `Schema conflict for topic "${topic2}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
112
+ );
113
+ }
107
114
  schemaRegistry.set(topic2, topicOrDesc.__schema);
108
115
  }
109
116
  }
110
- async function validateMessage(topicOrDesc, message, deps) {
117
+ async function validateMessage(topicOrDesc, message, deps, ctx) {
111
118
  const topicName = resolveTopicName(topicOrDesc);
112
119
  if (topicOrDesc?.__schema) {
113
120
  try {
114
- return await topicOrDesc.__schema.parse(message);
121
+ return await topicOrDesc.__schema.parse(message, ctx);
115
122
  } catch (error) {
116
123
  throw new KafkaValidationError(topicName, message, {
117
124
  cause: error instanceof Error ? error : new Error(String(error))
@@ -122,7 +129,7 @@ async function validateMessage(topicOrDesc, message, deps) {
122
129
  const schema = deps.schemaRegistry.get(topicOrDesc);
123
130
  if (schema) {
124
131
  try {
125
- return await schema.parse(message);
132
+ return await schema.parse(message, ctx);
126
133
  } catch (error) {
127
134
  throw new KafkaValidationError(topicName, message, {
128
135
  cause: error instanceof Error ? error : new Error(String(error))
@@ -145,9 +152,14 @@ async function buildSendPayload(topicOrDesc, messages, deps) {
145
152
  for (const inst of deps.instrumentation) {
146
153
  inst.beforeSend?.(topic2, envelopeHeaders);
147
154
  }
155
+ const sendCtx = {
156
+ topic: topic2,
157
+ headers: envelopeHeaders,
158
+ version: m.schemaVersion ?? 1
159
+ };
148
160
  return {
149
161
  value: JSON.stringify(
150
- await validateMessage(topicOrDesc, m.value, deps)
162
+ await validateMessage(topicOrDesc, m.value, deps, sendCtx)
151
163
  ),
152
164
  key: m.key ?? null,
153
165
  headers: envelopeHeaders
@@ -191,19 +203,26 @@ function getOrCreateConsumer(groupId, fromBeginning, autoCommit, deps) {
191
203
  consumers.set(groupId, consumer);
192
204
  return consumer;
193
205
  }
194
- function buildSchemaMap(topics, schemaRegistry, optionSchemas) {
206
+ function buildSchemaMap(topics, schemaRegistry, optionSchemas, logger) {
195
207
  const schemaMap = /* @__PURE__ */ new Map();
208
+ const registerChecked = (name, schema) => {
209
+ const existing = schemaRegistry.get(name);
210
+ if (existing && existing !== schema) {
211
+ logger?.warn(
212
+ `Schema conflict for topic "${name}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
213
+ );
214
+ }
215
+ schemaMap.set(name, schema);
216
+ schemaRegistry.set(name, schema);
217
+ };
196
218
  for (const t of topics) {
197
219
  if (t?.__schema) {
198
- const name = resolveTopicName(t);
199
- schemaMap.set(name, t.__schema);
200
- schemaRegistry.set(name, t.__schema);
220
+ registerChecked(resolveTopicName(t), t.__schema);
201
221
  }
202
222
  }
203
223
  if (optionSchemas) {
204
224
  for (const [k, v] of optionSchemas) {
205
- schemaMap.set(k, v);
206
- schemaRegistry.set(k, v);
225
+ registerChecked(k, v);
207
226
  }
208
227
  }
209
228
  return schemaMap;
@@ -230,8 +249,13 @@ function parseJsonMessage(raw, topic2, logger) {
230
249
  async function validateWithSchema(message, raw, topic2, schemaMap, interceptors, dlq, deps) {
231
250
  const schema = schemaMap.get(topic2);
232
251
  if (!schema) return message;
252
+ const ctx = {
253
+ topic: topic2,
254
+ headers: deps.originalHeaders ?? {},
255
+ version: Number(deps.originalHeaders?.["x-schema-version"] ?? 1)
256
+ };
233
257
  try {
234
- return await schema.parse(message);
258
+ return await schema.parse(message, ctx);
235
259
  } catch (error) {
236
260
  const err = toError(error);
237
261
  const validationError = new KafkaValidationError(topic2, message, {
@@ -268,7 +292,7 @@ async function validateWithSchema(message, raw, topic2, schemaMap, interceptors,
268
292
  return null;
269
293
  }
270
294
  }
271
- async function sendToDlq(topic2, rawMessage, deps, meta) {
295
+ function buildDlqPayload(topic2, rawMessage, meta) {
272
296
  const dlqTopic = `${topic2}.dlq`;
273
297
  const headers = {
274
298
  ...meta?.originalHeaders ?? {},
@@ -278,54 +302,82 @@ async function sendToDlq(topic2, rawMessage, deps, meta) {
278
302
  "x-dlq-error-stack": meta?.error.stack?.slice(0, 2e3) ?? "",
279
303
  "x-dlq-attempt-count": String(meta?.attempt ?? 0)
280
304
  };
305
+ return { topic: dlqTopic, messages: [{ value: rawMessage, headers }] };
306
+ }
307
+ async function sendToDlq(topic2, rawMessage, deps, meta) {
308
+ const payload = buildDlqPayload(topic2, rawMessage, meta);
281
309
  try {
282
- await deps.producer.send({
283
- topic: dlqTopic,
284
- messages: [{ value: rawMessage, headers }]
285
- });
286
- deps.logger.warn(`Message sent to DLQ: ${dlqTopic}`);
310
+ await deps.producer.send(payload);
311
+ deps.logger.warn(`Message sent to DLQ: ${payload.topic}`);
287
312
  } catch (error) {
288
- deps.logger.error(
289
- `Failed to send message to DLQ ${dlqTopic}:`,
290
- toError(error).stack
291
- );
313
+ const err = toError(error);
314
+ deps.logger.error(`Failed to send message to DLQ ${payload.topic}:`, err.stack);
315
+ await deps.onMessageLost?.({
316
+ topic: topic2,
317
+ error: err,
318
+ attempt: meta?.attempt ?? 0,
319
+ headers: meta?.originalHeaders ?? {}
320
+ });
292
321
  }
293
322
  }
294
323
  var RETRY_HEADER_ATTEMPT = "x-retry-attempt";
295
324
  var RETRY_HEADER_AFTER = "x-retry-after";
296
325
  var RETRY_HEADER_MAX_RETRIES = "x-retry-max-retries";
297
326
  var RETRY_HEADER_ORIGINAL_TOPIC = "x-retry-original-topic";
298
- async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
327
+ function buildRetryTopicPayload(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders) {
299
328
  const retryTopic = `${originalTopic}.retry.${attempt}`;
300
- const {
301
- [RETRY_HEADER_ATTEMPT]: _a,
302
- [RETRY_HEADER_AFTER]: _b,
303
- [RETRY_HEADER_MAX_RETRIES]: _c,
304
- [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
305
- ...userHeaders
306
- } = originalHeaders;
307
- const headers = {
308
- ...userHeaders,
309
- [RETRY_HEADER_ATTEMPT]: String(attempt),
310
- [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
311
- [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
312
- [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
329
+ function buildHeaders(hdr) {
330
+ const {
331
+ [RETRY_HEADER_ATTEMPT]: _a,
332
+ [RETRY_HEADER_AFTER]: _b,
333
+ [RETRY_HEADER_MAX_RETRIES]: _c,
334
+ [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
335
+ ...userHeaders
336
+ } = hdr;
337
+ return {
338
+ ...userHeaders,
339
+ [RETRY_HEADER_ATTEMPT]: String(attempt),
340
+ [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
341
+ [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
342
+ [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
343
+ };
344
+ }
345
+ return {
346
+ topic: retryTopic,
347
+ messages: rawMessages.map((value, i) => ({
348
+ value,
349
+ headers: buildHeaders(
350
+ Array.isArray(originalHeaders) ? originalHeaders[i] ?? {} : originalHeaders
351
+ )
352
+ }))
313
353
  };
354
+ }
355
+ async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
356
+ const payload = buildRetryTopicPayload(
357
+ originalTopic,
358
+ rawMessages,
359
+ attempt,
360
+ maxRetries,
361
+ delayMs,
362
+ originalHeaders
363
+ );
314
364
  try {
315
- for (const raw of rawMessages) {
316
- await deps.producer.send({
317
- topic: retryTopic,
318
- messages: [{ value: raw, headers }]
319
- });
320
- }
365
+ await deps.producer.send(payload);
321
366
  deps.logger.warn(
322
- `Message queued in retry topic ${retryTopic} (attempt ${attempt}/${maxRetries})`
367
+ `Message queued in retry topic ${payload.topic} (attempt ${attempt}/${maxRetries})`
323
368
  );
324
369
  } catch (error) {
370
+ const err = toError(error);
325
371
  deps.logger.error(
326
- `Failed to send message to retry topic ${retryTopic}:`,
327
- toError(error).stack
372
+ `Failed to send message to retry topic ${payload.topic}:`,
373
+ err.stack
328
374
  );
375
+ await deps.onMessageLost?.({
376
+ topic: originalTopic,
377
+ error: err,
378
+ attempt,
379
+ headers: Array.isArray(originalHeaders) ? originalHeaders[0] ?? {} : originalHeaders
380
+ });
329
381
  }
330
382
  }
331
383
  async function broadcastToInterceptors(envelopes, interceptors, cb) {
@@ -337,11 +389,17 @@ async function broadcastToInterceptors(envelopes, interceptors, cb) {
337
389
  }
338
390
  async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentation) {
339
391
  const cleanups = [];
392
+ const wraps = [];
340
393
  try {
341
394
  for (const env of envelopes) {
342
395
  for (const inst of instrumentation) {
343
- const cleanup = inst.beforeConsume?.(env);
344
- if (typeof cleanup === "function") cleanups.push(cleanup);
396
+ const result = inst.beforeConsume?.(env);
397
+ if (typeof result === "function") {
398
+ cleanups.push(result);
399
+ } else if (result) {
400
+ if (result.cleanup) cleanups.push(result.cleanup);
401
+ if (result.wrap) wraps.push(result.wrap);
402
+ }
345
403
  }
346
404
  }
347
405
  for (const env of envelopes) {
@@ -349,7 +407,13 @@ async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentati
349
407
  await interceptor.before?.(env);
350
408
  }
351
409
  }
352
- await fn();
410
+ let runFn = fn;
411
+ for (let i = wraps.length - 1; i >= 0; i--) {
412
+ const wrap = wraps[i];
413
+ const inner = runFn;
414
+ runFn = () => wrap(inner);
415
+ }
416
+ await runFn();
353
417
  for (const env of envelopes) {
354
418
  for (const interceptor of interceptors) {
355
419
  await interceptor.after?.(env);
@@ -419,7 +483,7 @@ async function executeWithRetry(fn, ctx, deps) {
419
483
  1,
420
484
  retry.maxRetries,
421
485
  delay,
422
- envelopes[0]?.headers ?? {},
486
+ isBatch ? envelopes.map((e) => e.headers) : envelopes[0]?.headers ?? {},
423
487
  deps
424
488
  );
425
489
  } else if (isLastAttempt) {
@@ -521,6 +585,7 @@ async function handleEachBatch(payload, opts, deps) {
521
585
  interceptors,
522
586
  dlq,
523
587
  retry,
588
+ retryTopics,
524
589
  timeoutMs,
525
590
  wrapWithTimeout
526
591
  } = opts;
@@ -555,11 +620,12 @@ async function handleEachBatch(payload, opts, deps) {
555
620
  },
556
621
  {
557
622
  envelope: envelopes,
558
- rawMessages: batch.messages.filter((m) => m.value).map((m) => m.value.toString()),
623
+ rawMessages,
559
624
  interceptors,
560
625
  dlq,
561
626
  retry,
562
- isBatch: true
627
+ isBatch: true,
628
+ retryTopics
563
629
  },
564
630
  deps
565
631
  );
@@ -576,10 +642,11 @@ async function subscribeWithRetry(consumer, topics, logger, retryOpts) {
576
642
  } catch (error) {
577
643
  if (attempt === maxAttempts) throw error;
578
644
  const msg = toError(error).message;
645
+ const delay = Math.floor(Math.random() * backoffMs);
579
646
  logger.warn(
580
- `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${backoffMs}ms...`
647
+ `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${delay}ms...`
581
648
  );
582
- await sleep(backoffMs);
649
+ await sleep(delay);
583
650
  }
584
651
  }
585
652
  }
@@ -608,7 +675,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
608
675
  onMessageLost,
609
676
  ensureTopic,
610
677
  getOrCreateConsumer: getOrCreateConsumer2,
611
- runningConsumers
678
+ runningConsumers,
679
+ createRetryTxProducer
612
680
  } = deps;
613
681
  const backoffMs = retry.backoffMs ?? 1e3;
614
682
  const maxBackoffMs = retry.maxBackoffMs ?? 3e4;
@@ -616,6 +684,7 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
616
684
  for (const lt of levelTopics) {
617
685
  await ensureTopic(lt);
618
686
  }
687
+ const levelTxProducer = await createRetryTxProducer(`${levelGroupId}-tx`);
619
688
  const consumer = getOrCreateConsumer2(levelGroupId, false, false);
620
689
  await consumer.connect();
621
690
  await subscribeWithRetry(consumer, levelTopics, logger);
@@ -704,22 +773,67 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
704
773
  const nextLevel = level + 1;
705
774
  const cap = Math.min(backoffMs * 2 ** level, maxBackoffMs);
706
775
  const delay = Math.floor(Math.random() * cap);
707
- await sendToRetryTopic(
776
+ const { topic: rtTopic, messages: rtMsgs } = buildRetryTopicPayload(
708
777
  originalTopic,
709
778
  [raw],
710
779
  nextLevel,
711
780
  currentMaxRetries,
712
781
  delay,
713
- headers,
714
- pipelineDeps
782
+ headers
715
783
  );
784
+ const tx = await levelTxProducer.transaction();
785
+ try {
786
+ await tx.send({ topic: rtTopic, messages: rtMsgs });
787
+ await tx.sendOffsets({
788
+ consumer,
789
+ topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
790
+ });
791
+ await tx.commit();
792
+ logger.warn(
793
+ `Message routed to ${rtTopic} (EOS, level ${nextLevel}/${currentMaxRetries})`
794
+ );
795
+ } catch (txErr) {
796
+ try {
797
+ await tx.abort();
798
+ } catch {
799
+ }
800
+ logger.error(
801
+ `EOS routing to ${rtTopic} failed \u2014 message will be redelivered:`,
802
+ toError(txErr).stack
803
+ );
804
+ return;
805
+ }
716
806
  } else if (dlq) {
717
- await sendToDlq(originalTopic, raw, pipelineDeps, {
718
- error,
719
- // +1 to account for the main consumer's initial attempt before routing.
720
- attempt: level + 1,
721
- originalHeaders: headers
722
- });
807
+ const { topic: dTopic, messages: dMsgs } = buildDlqPayload(
808
+ originalTopic,
809
+ raw,
810
+ {
811
+ error,
812
+ // +1 to account for the main consumer's initial attempt before routing.
813
+ attempt: level + 1,
814
+ originalHeaders: headers
815
+ }
816
+ );
817
+ const tx = await levelTxProducer.transaction();
818
+ try {
819
+ await tx.send({ topic: dTopic, messages: dMsgs });
820
+ await tx.sendOffsets({
821
+ consumer,
822
+ topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
823
+ });
824
+ await tx.commit();
825
+ logger.warn(`Message sent to DLQ: ${dTopic} (EOS)`);
826
+ } catch (txErr) {
827
+ try {
828
+ await tx.abort();
829
+ } catch {
830
+ }
831
+ logger.error(
832
+ `EOS DLQ routing to ${dTopic} failed \u2014 message will be redelivered:`,
833
+ toError(txErr).stack
834
+ );
835
+ return;
836
+ }
723
837
  } else {
724
838
  await onMessageLost?.({
725
839
  topic: originalTopic,
@@ -727,8 +841,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
727
841
  attempt: level,
728
842
  headers
729
843
  });
844
+ await consumer.commitOffsets([nextOffset]);
730
845
  }
731
- await consumer.commitOffsets([nextOffset]);
732
846
  }
733
847
  });
734
848
  runningConsumers.set(levelGroupId, "eachMessage");
@@ -766,6 +880,7 @@ var KafkaClient = class {
766
880
  kafka;
767
881
  producer;
768
882
  txProducer;
883
+ retryTxProducers = /* @__PURE__ */ new Set();
769
884
  consumers = /* @__PURE__ */ new Map();
770
885
  admin;
771
886
  logger;
@@ -783,6 +898,8 @@ var KafkaClient = class {
783
898
  onMessageLost;
784
899
  onRebalance;
785
900
  isAdminConnected = false;
901
+ inFlightTotal = 0;
902
+ drainResolvers = [];
786
903
  clientId;
787
904
  constructor(clientId, groupId, brokers, options) {
788
905
  this.clientId = clientId;
@@ -790,7 +907,8 @@ var KafkaClient = class {
790
907
  this.logger = options?.logger ?? {
791
908
  log: (msg) => console.log(`[KafkaClient:${clientId}] ${msg}`),
792
909
  warn: (msg, ...args) => console.warn(`[KafkaClient:${clientId}] ${msg}`, ...args),
793
- error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args)
910
+ error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args),
911
+ debug: (msg, ...args) => console.debug(`[KafkaClient:${clientId}] ${msg}`, ...args)
794
912
  };
795
913
  this.autoCreateTopicsEnabled = options?.autoCreateTopics ?? false;
796
914
  this.strictSchemasEnabled = options?.strictSchemas ?? true;
@@ -834,7 +952,7 @@ var KafkaClient = class {
834
952
  /** Execute multiple sends atomically. Commits on success, aborts on error. */
835
953
  async transaction(fn) {
836
954
  if (!this.txProducer) {
837
- this.txProducer = this.kafka.producer({
955
+ const p = this.kafka.producer({
838
956
  kafkaJS: {
839
957
  acks: -1,
840
958
  idempotent: true,
@@ -842,7 +960,8 @@ var KafkaClient = class {
842
960
  maxInFlightRequests: 1
843
961
  }
844
962
  });
845
- await this.txProducer.connect();
963
+ await p.connect();
964
+ this.txProducer = p;
846
965
  }
847
966
  const tx = await this.txProducer.transaction();
848
967
  try {
@@ -859,9 +978,12 @@ var KafkaClient = class {
859
978
  }
860
979
  ]);
861
980
  await tx.send(payload);
981
+ this.notifyAfterSend(payload.topic, payload.messages.length);
862
982
  },
863
983
  sendBatch: async (topicOrDesc, messages) => {
864
- await tx.send(await this.preparePayload(topicOrDesc, messages));
984
+ const payload = await this.preparePayload(topicOrDesc, messages);
985
+ await tx.send(payload);
986
+ this.notifyAfterSend(payload.topic, payload.messages.length);
865
987
  }
866
988
  };
867
989
  await fn(ctx);
@@ -898,23 +1020,28 @@ var KafkaClient = class {
898
1020
  const deps = this.messageDeps;
899
1021
  const timeoutMs = options.handlerTimeoutMs;
900
1022
  await consumer.run({
901
- eachMessage: (payload) => handleEachMessage(
902
- payload,
903
- {
904
- schemaMap,
905
- handleMessage,
906
- interceptors,
907
- dlq,
908
- retry,
909
- retryTopics: options.retryTopics,
910
- timeoutMs,
911
- wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
912
- },
913
- deps
1023
+ eachMessage: (payload) => this.trackInFlight(
1024
+ () => handleEachMessage(
1025
+ payload,
1026
+ {
1027
+ schemaMap,
1028
+ handleMessage,
1029
+ interceptors,
1030
+ dlq,
1031
+ retry,
1032
+ retryTopics: options.retryTopics,
1033
+ timeoutMs,
1034
+ wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
1035
+ },
1036
+ deps
1037
+ )
914
1038
  )
915
1039
  });
916
1040
  this.runningConsumers.set(gid, "eachMessage");
917
1041
  if (options.retryTopics && retry) {
1042
+ if (!this.autoCreateTopicsEnabled) {
1043
+ await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
1044
+ }
918
1045
  const companions = await startRetryTopicConsumers(
919
1046
  topicNames,
920
1047
  gid,
@@ -931,25 +1058,65 @@ var KafkaClient = class {
931
1058
  return { groupId: gid, stop: () => this.stopConsumer(gid) };
932
1059
  }
933
1060
  async startBatchConsumer(topics, handleBatch, options = {}) {
934
- const { consumer, schemaMap, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
1061
+ if (options.retryTopics && !options.retry) {
1062
+ throw new Error(
1063
+ "retryTopics requires retry to be configured \u2014 set retry.maxRetries to enable the retry topic chain"
1064
+ );
1065
+ }
1066
+ if (options.autoCommit !== false) {
1067
+ this.logger.debug?.(
1068
+ `startBatchConsumer: autoCommit is enabled (default true). If your handler calls resolveOffset() or commitOffsetsIfNecessary(), set autoCommit: false to avoid offset conflicts.`
1069
+ );
1070
+ }
1071
+ const { consumer, schemaMap, topicNames, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
935
1072
  const deps = this.messageDeps;
936
1073
  const timeoutMs = options.handlerTimeoutMs;
937
1074
  await consumer.run({
938
- eachBatch: (payload) => handleEachBatch(
939
- payload,
940
- {
941
- schemaMap,
942
- handleBatch,
943
- interceptors,
944
- dlq,
945
- retry,
946
- timeoutMs,
947
- wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
948
- },
949
- deps
1075
+ eachBatch: (payload) => this.trackInFlight(
1076
+ () => handleEachBatch(
1077
+ payload,
1078
+ {
1079
+ schemaMap,
1080
+ handleBatch,
1081
+ interceptors,
1082
+ dlq,
1083
+ retry,
1084
+ retryTopics: options.retryTopics,
1085
+ timeoutMs,
1086
+ wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
1087
+ },
1088
+ deps
1089
+ )
950
1090
  )
951
1091
  });
952
1092
  this.runningConsumers.set(gid, "eachBatch");
1093
+ if (options.retryTopics && retry) {
1094
+ if (!this.autoCreateTopicsEnabled) {
1095
+ await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
1096
+ }
1097
+ const handleMessageForRetry = (env) => handleBatch([env], {
1098
+ partition: env.partition,
1099
+ highWatermark: env.offset,
1100
+ heartbeat: async () => {
1101
+ },
1102
+ resolveOffset: () => {
1103
+ },
1104
+ commitOffsetsIfNecessary: async () => {
1105
+ }
1106
+ });
1107
+ const companions = await startRetryTopicConsumers(
1108
+ topicNames,
1109
+ gid,
1110
+ handleMessageForRetry,
1111
+ retry,
1112
+ dlq,
1113
+ interceptors,
1114
+ schemaMap,
1115
+ this.retryTopicDeps,
1116
+ options.retryTopicAssignmentTimeoutMs
1117
+ );
1118
+ this.companionGroupIds.set(gid, companions);
1119
+ }
953
1120
  return { groupId: gid, stop: () => this.stopConsumer(gid) };
954
1121
  }
955
1122
  // ── Consumer lifecycle ───────────────────────────────────────────
@@ -1001,14 +1168,15 @@ var KafkaClient = class {
1001
1168
  */
1002
1169
  async getConsumerLag(groupId) {
1003
1170
  const gid = groupId ?? this.defaultGroupId;
1004
- if (!this.isAdminConnected) {
1005
- await this.admin.connect();
1006
- this.isAdminConnected = true;
1007
- }
1171
+ await this.ensureAdminConnected();
1008
1172
  const committedByTopic = await this.admin.fetchOffsets({ groupId: gid });
1173
+ const brokerOffsetsAll = await Promise.all(
1174
+ committedByTopic.map(({ topic: topic2 }) => this.admin.fetchTopicOffsets(topic2))
1175
+ );
1009
1176
  const result = [];
1010
- for (const { topic: topic2, partitions } of committedByTopic) {
1011
- const brokerOffsets = await this.admin.fetchTopicOffsets(topic2);
1177
+ for (let i = 0; i < committedByTopic.length; i++) {
1178
+ const { topic: topic2, partitions } = committedByTopic[i];
1179
+ const brokerOffsets = brokerOffsetsAll[i];
1012
1180
  for (const { partition, offset } of partitions) {
1013
1181
  const broker = brokerOffsets.find((o) => o.partition === partition);
1014
1182
  if (!broker) continue;
@@ -1023,10 +1191,7 @@ var KafkaClient = class {
1023
1191
  /** Check broker connectivity. Never throws — returns a discriminated union. */
1024
1192
  async checkStatus() {
1025
1193
  try {
1026
- if (!this.isAdminConnected) {
1027
- await this.admin.connect();
1028
- this.isAdminConnected = true;
1029
- }
1194
+ await this.ensureAdminConnected();
1030
1195
  const topics = await this.admin.listTopics();
1031
1196
  return { status: "up", clientId: this.clientId, topics };
1032
1197
  } catch (error) {
@@ -1041,12 +1206,17 @@ var KafkaClient = class {
1041
1206
  return this.clientId;
1042
1207
  }
1043
1208
  /** Gracefully disconnect producer, all consumers, and admin. */
1044
- async disconnect() {
1209
+ async disconnect(drainTimeoutMs = 3e4) {
1210
+ await this.waitForDrain(drainTimeoutMs);
1045
1211
  const tasks = [this.producer.disconnect()];
1046
1212
  if (this.txProducer) {
1047
1213
  tasks.push(this.txProducer.disconnect());
1048
1214
  this.txProducer = void 0;
1049
1215
  }
1216
+ for (const p of this.retryTxProducers) {
1217
+ tasks.push(p.disconnect());
1218
+ }
1219
+ this.retryTxProducers.clear();
1050
1220
  for (const consumer of this.consumers.values()) {
1051
1221
  tasks.push(consumer.disconnect());
1052
1222
  }
@@ -1061,9 +1231,59 @@ var KafkaClient = class {
1061
1231
  this.companionGroupIds.clear();
1062
1232
  this.logger.log("All connections closed");
1063
1233
  }
1234
+ // ── Graceful shutdown ────────────────────────────────────────────
1235
+ /**
1236
+ * Register SIGTERM / SIGINT handlers that drain in-flight messages before
1237
+ * disconnecting. Call this once after constructing the client in non-NestJS apps.
1238
+ * NestJS apps get drain for free via `onModuleDestroy` → `disconnect()`.
1239
+ */
1240
+ enableGracefulShutdown(signals = ["SIGTERM", "SIGINT"], drainTimeoutMs = 3e4) {
1241
+ const handler = () => {
1242
+ this.logger.log(
1243
+ "Shutdown signal received \u2014 draining in-flight handlers..."
1244
+ );
1245
+ this.disconnect(drainTimeoutMs).catch(
1246
+ (err) => this.logger.error(
1247
+ "Error during graceful shutdown:",
1248
+ toError(err).message
1249
+ )
1250
+ );
1251
+ };
1252
+ for (const signal of signals) {
1253
+ process.once(signal, handler);
1254
+ }
1255
+ }
1256
+ trackInFlight(fn) {
1257
+ this.inFlightTotal++;
1258
+ return fn().finally(() => {
1259
+ this.inFlightTotal--;
1260
+ if (this.inFlightTotal === 0) {
1261
+ this.drainResolvers.splice(0).forEach((r) => r());
1262
+ }
1263
+ });
1264
+ }
1265
+ waitForDrain(timeoutMs) {
1266
+ if (this.inFlightTotal === 0) return Promise.resolve();
1267
+ return new Promise((resolve) => {
1268
+ let handle;
1269
+ const onDrain = () => {
1270
+ clearTimeout(handle);
1271
+ resolve();
1272
+ };
1273
+ this.drainResolvers.push(onDrain);
1274
+ handle = setTimeout(() => {
1275
+ const idx = this.drainResolvers.indexOf(onDrain);
1276
+ if (idx !== -1) this.drainResolvers.splice(idx, 1);
1277
+ this.logger.warn(
1278
+ `Drain timed out after ${timeoutMs}ms \u2014 ${this.inFlightTotal} handler(s) still in flight`
1279
+ );
1280
+ resolve();
1281
+ }, timeoutMs);
1282
+ });
1283
+ }
1064
1284
  // ── Private helpers ──────────────────────────────────────────────
1065
1285
  async preparePayload(topicOrDesc, messages) {
1066
- registerSchema(topicOrDesc, this.schemaRegistry);
1286
+ registerSchema(topicOrDesc, this.schemaRegistry, this.logger);
1067
1287
  const payload = await buildSendPayload(
1068
1288
  topicOrDesc,
1069
1289
  messages,
@@ -1096,12 +1316,78 @@ var KafkaClient = class {
1096
1316
  }, timeoutMs);
1097
1317
  return promise;
1098
1318
  }
1099
- async ensureTopic(topic2) {
1100
- if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
1101
- if (!this.isAdminConnected) {
1319
+ /**
1320
+ * When `retryTopics: true` and `autoCreateTopics: false`, verify that every
1321
+ * `<topic>.retry.<level>` topic already exists. Throws a clear error at startup
1322
+ * rather than silently discovering missing topics on the first handler failure.
1323
+ */
1324
+ async validateRetryTopicsExist(topicNames, maxRetries) {
1325
+ await this.ensureAdminConnected();
1326
+ const existing = new Set(await this.admin.listTopics());
1327
+ const missing = [];
1328
+ for (const t of topicNames) {
1329
+ for (let level = 1; level <= maxRetries; level++) {
1330
+ const retryTopic = `${t}.retry.${level}`;
1331
+ if (!existing.has(retryTopic)) missing.push(retryTopic);
1332
+ }
1333
+ }
1334
+ if (missing.length > 0) {
1335
+ throw new Error(
1336
+ `retryTopics: true but the following retry topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
1337
+ );
1338
+ }
1339
+ }
1340
+ /**
1341
+ * When `autoCreateTopics` is disabled, verify that `<topic>.dlq` exists for every
1342
+ * consumed topic. Throws a clear error at startup rather than silently discovering
1343
+ * missing DLQ topics on the first handler failure.
1344
+ */
1345
+ async validateDlqTopicsExist(topicNames) {
1346
+ await this.ensureAdminConnected();
1347
+ const existing = new Set(await this.admin.listTopics());
1348
+ const missing = topicNames.filter((t) => !existing.has(`${t}.dlq`)).map((t) => `${t}.dlq`);
1349
+ if (missing.length > 0) {
1350
+ throw new Error(
1351
+ `dlq: true but the following DLQ topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
1352
+ );
1353
+ }
1354
+ }
1355
+ /**
1356
+ * Connect the admin client if not already connected.
1357
+ * The flag is only set to `true` after a successful connect — if `admin.connect()`
1358
+ * throws the flag remains `false` so the next call will retry the connection.
1359
+ */
1360
+ async ensureAdminConnected() {
1361
+ if (this.isAdminConnected) return;
1362
+ try {
1102
1363
  await this.admin.connect();
1103
1364
  this.isAdminConnected = true;
1365
+ } catch (err) {
1366
+ this.isAdminConnected = false;
1367
+ throw err;
1104
1368
  }
1369
+ }
1370
+ /**
1371
+ * Create and connect a transactional producer for EOS retry routing.
1372
+ * Each retry level consumer gets its own producer with a unique `transactionalId`
1373
+ * so Kafka can fence stale producers on restart without affecting other levels.
1374
+ */
1375
+ async createRetryTxProducer(transactionalId) {
1376
+ const p = this.kafka.producer({
1377
+ kafkaJS: {
1378
+ acks: -1,
1379
+ idempotent: true,
1380
+ transactionalId,
1381
+ maxInFlightRequests: 1
1382
+ }
1383
+ });
1384
+ await p.connect();
1385
+ this.retryTxProducers.add(p);
1386
+ return p;
1387
+ }
1388
+ async ensureTopic(topic2) {
1389
+ if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
1390
+ await this.ensureAdminConnected();
1105
1391
  await this.admin.createTopics({
1106
1392
  topics: [{ topic: topic2, numPartitions: this.numPartitions }]
1107
1393
  });
@@ -1134,7 +1420,8 @@ var KafkaClient = class {
1134
1420
  const schemaMap = buildSchemaMap(
1135
1421
  topics,
1136
1422
  this.schemaRegistry,
1137
- optionSchemas
1423
+ optionSchemas,
1424
+ this.logger
1138
1425
  );
1139
1426
  const topicNames = topics.map((t) => resolveTopicName(t));
1140
1427
  for (const t of topicNames) {
@@ -1144,6 +1431,9 @@ var KafkaClient = class {
1144
1431
  for (const t of topicNames) {
1145
1432
  await this.ensureTopic(`${t}.dlq`);
1146
1433
  }
1434
+ if (!this.autoCreateTopicsEnabled) {
1435
+ await this.validateDlqTopicsExist(topicNames);
1436
+ }
1147
1437
  }
1148
1438
  await consumer.connect();
1149
1439
  await subscribeWithRetry(
@@ -1162,7 +1452,8 @@ var KafkaClient = class {
1162
1452
  return {
1163
1453
  schemaRegistry: this.schemaRegistry,
1164
1454
  strictSchemasEnabled: this.strictSchemasEnabled,
1165
- instrumentation: this.instrumentation
1455
+ instrumentation: this.instrumentation,
1456
+ logger: this.logger
1166
1457
  };
1167
1458
  }
1168
1459
  get consumerOpsDeps() {
@@ -1190,7 +1481,8 @@ var KafkaClient = class {
1190
1481
  onMessageLost: this.onMessageLost,
1191
1482
  ensureTopic: (t) => this.ensureTopic(t),
1192
1483
  getOrCreateConsumer: (gid, fb, ac) => getOrCreateConsumer(gid, fb, ac, this.consumerOpsDeps),
1193
- runningConsumers: this.runningConsumers
1484
+ runningConsumers: this.runningConsumers,
1485
+ createRetryTxProducer: (txId) => this.createRetryTxProducer(txId)
1194
1486
  };
1195
1487
  }
1196
1488
  };
@@ -1228,4 +1520,4 @@ export {
1228
1520
  KafkaClient,
1229
1521
  topic
1230
1522
  };
1231
- //# sourceMappingURL=chunk-TD2AE774.mjs.map
1523
+ //# sourceMappingURL=chunk-RGRKN4E5.mjs.map