@drarzter/kafka-client 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/core.js CHANGED
@@ -172,9 +172,20 @@ async function validateWithSchema(message, raw, topic2, schemaMap, interceptors,
172
172
  originalHeaders: deps.originalHeaders
173
173
  });
174
174
  } else {
175
- await deps.onMessageLost?.({ topic: topic2, error: validationError, attempt: 0, headers: deps.originalHeaders ?? {} });
175
+ await deps.onMessageLost?.({
176
+ topic: topic2,
177
+ error: validationError,
178
+ attempt: 0,
179
+ headers: deps.originalHeaders ?? {}
180
+ });
176
181
  }
177
- const errorEnvelope = extractEnvelope(message, deps.originalHeaders ?? {}, topic2, -1, "");
182
+ const errorEnvelope = extractEnvelope(
183
+ message,
184
+ deps.originalHeaders ?? {},
185
+ topic2,
186
+ -1,
187
+ ""
188
+ );
178
189
  for (const interceptor of interceptors) {
179
190
  await interceptor.onError?.(errorEnvelope, validationError);
180
191
  }
@@ -204,9 +215,54 @@ async function sendToDlq(topic2, rawMessage, deps, meta) {
204
215
  );
205
216
  }
206
217
  }
218
+ var RETRY_HEADER_ATTEMPT = "x-retry-attempt";
219
+ var RETRY_HEADER_AFTER = "x-retry-after";
220
+ var RETRY_HEADER_MAX_RETRIES = "x-retry-max-retries";
221
+ var RETRY_HEADER_ORIGINAL_TOPIC = "x-retry-original-topic";
222
+ async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
223
+ const retryTopic = `${originalTopic}.retry`;
224
+ const {
225
+ [RETRY_HEADER_ATTEMPT]: _a,
226
+ [RETRY_HEADER_AFTER]: _b,
227
+ [RETRY_HEADER_MAX_RETRIES]: _c,
228
+ [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
229
+ ...userHeaders
230
+ } = originalHeaders;
231
+ const headers = {
232
+ ...userHeaders,
233
+ [RETRY_HEADER_ATTEMPT]: String(attempt),
234
+ [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
235
+ [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
236
+ [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
237
+ };
238
+ try {
239
+ for (const raw of rawMessages) {
240
+ await deps.producer.send({
241
+ topic: retryTopic,
242
+ messages: [{ value: raw, headers }]
243
+ });
244
+ }
245
+ deps.logger.warn(
246
+ `Message queued in retry topic ${retryTopic} (attempt ${attempt}/${maxRetries})`
247
+ );
248
+ } catch (error) {
249
+ deps.logger.error(
250
+ `Failed to send message to retry topic ${retryTopic}:`,
251
+ toError(error).stack
252
+ );
253
+ }
254
+ }
207
255
  async function executeWithRetry(fn, ctx, deps) {
208
- const { envelope, rawMessages, interceptors, dlq, retry, isBatch } = ctx;
209
- const maxAttempts = retry ? retry.maxRetries + 1 : 1;
256
+ const {
257
+ envelope,
258
+ rawMessages,
259
+ interceptors,
260
+ dlq,
261
+ retry,
262
+ isBatch,
263
+ retryTopics
264
+ } = ctx;
265
+ const maxAttempts = retryTopics ? 1 : retry ? retry.maxRetries + 1 : 1;
210
266
  const backoffMs = retry?.backoffMs ?? 1e3;
211
267
  const maxBackoffMs = retry?.maxBackoffMs ?? 3e4;
212
268
  const envelopes = Array.isArray(envelope) ? envelope : [envelope];
@@ -265,7 +321,19 @@ async function executeWithRetry(fn, ctx, deps) {
265
321
  `Error processing ${isBatch ? "batch" : "message"} from topic ${topic2} (attempt ${attempt}/${maxAttempts}):`,
266
322
  err.stack
267
323
  );
268
- if (isLastAttempt) {
324
+ if (retryTopics && retry) {
325
+ const cap = Math.min(backoffMs, maxBackoffMs);
326
+ const delay = Math.floor(Math.random() * cap);
327
+ await sendToRetryTopic(
328
+ topic2,
329
+ rawMessages,
330
+ 1,
331
+ retry.maxRetries,
332
+ delay,
333
+ envelopes[0]?.headers ?? {},
334
+ deps
335
+ );
336
+ } else if (isLastAttempt) {
269
337
  if (dlq) {
270
338
  const dlqMeta = {
271
339
  error: err,
@@ -328,6 +396,7 @@ var KafkaClient = class {
328
396
  runningConsumers = /* @__PURE__ */ new Map();
329
397
  instrumentation;
330
398
  onMessageLost;
399
+ onRebalance;
331
400
  isAdminConnected = false;
332
401
  clientId;
333
402
  constructor(clientId, groupId, brokers, options) {
@@ -343,6 +412,7 @@ var KafkaClient = class {
343
412
  this.numPartitions = options?.numPartitions ?? 1;
344
413
  this.instrumentation = options?.instrumentation ?? [];
345
414
  this.onMessageLost = options?.onMessageLost;
415
+ this.onRebalance = options?.onRebalance;
346
416
  this.kafka = new KafkaClass({
347
417
  kafkaJS: {
348
418
  clientId: this.clientId,
@@ -443,8 +513,19 @@ var KafkaClient = class {
443
513
  this.logger.log("Producer disconnected");
444
514
  }
445
515
  async startConsumer(topics, handleMessage, options = {}) {
446
- const { consumer, schemaMap, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachMessage", options);
447
- const deps = { logger: this.logger, producer: this.producer, instrumentation: this.instrumentation, onMessageLost: this.onMessageLost };
516
+ if (options.retryTopics && !options.retry) {
517
+ throw new Error(
518
+ "retryTopics requires retry to be configured \u2014 set retry.maxRetries to enable the retry topic chain"
519
+ );
520
+ }
521
+ const { consumer, schemaMap, topicNames, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachMessage", options);
522
+ const deps = {
523
+ logger: this.logger,
524
+ producer: this.producer,
525
+ instrumentation: this.instrumentation,
526
+ onMessageLost: this.onMessageLost
527
+ };
528
+ const timeoutMs = options.handlerTimeoutMs;
448
529
  await consumer.run({
449
530
  eachMessage: async ({ topic: topic2, partition, message }) => {
450
531
  if (!message.value) {
@@ -473,20 +554,51 @@ var KafkaClient = class {
473
554
  message.offset
474
555
  );
475
556
  await executeWithRetry(
476
- () => runWithEnvelopeContext(
477
- { correlationId: envelope.correlationId, traceparent: envelope.traceparent },
478
- () => handleMessage(envelope)
479
- ),
480
- { envelope, rawMessages: [raw], interceptors, dlq, retry },
557
+ () => {
558
+ const fn = () => runWithEnvelopeContext(
559
+ {
560
+ correlationId: envelope.correlationId,
561
+ traceparent: envelope.traceparent
562
+ },
563
+ () => handleMessage(envelope)
564
+ );
565
+ return timeoutMs ? this.wrapWithTimeoutWarning(fn, timeoutMs, topic2) : fn();
566
+ },
567
+ {
568
+ envelope,
569
+ rawMessages: [raw],
570
+ interceptors,
571
+ dlq,
572
+ retry,
573
+ retryTopics: options.retryTopics
574
+ },
481
575
  deps
482
576
  );
483
577
  }
484
578
  });
485
579
  this.runningConsumers.set(gid, "eachMessage");
580
+ if (options.retryTopics && retry) {
581
+ await this.startRetryTopicConsumers(
582
+ topicNames,
583
+ gid,
584
+ handleMessage,
585
+ retry,
586
+ dlq,
587
+ interceptors,
588
+ schemaMap
589
+ );
590
+ }
591
+ return { groupId: gid, stop: () => this.stopConsumer(gid) };
486
592
  }
487
593
  async startBatchConsumer(topics, handleBatch, options = {}) {
488
594
  const { consumer, schemaMap, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
489
- const deps = { logger: this.logger, producer: this.producer, instrumentation: this.instrumentation, onMessageLost: this.onMessageLost };
595
+ const deps = {
596
+ logger: this.logger,
597
+ producer: this.producer,
598
+ instrumentation: this.instrumentation,
599
+ onMessageLost: this.onMessageLost
600
+ };
601
+ const timeoutMs = options.handlerTimeoutMs;
490
602
  await consumer.run({
491
603
  eachBatch: async ({
492
604
  batch,
@@ -518,7 +630,13 @@ var KafkaClient = class {
518
630
  );
519
631
  if (validated === null) continue;
520
632
  envelopes.push(
521
- extractEnvelope(validated, headers, batch.topic, batch.partition, message.offset)
633
+ extractEnvelope(
634
+ validated,
635
+ headers,
636
+ batch.topic,
637
+ batch.partition,
638
+ message.offset
639
+ )
522
640
  );
523
641
  rawMessages.push(raw);
524
642
  }
@@ -531,7 +649,10 @@ var KafkaClient = class {
531
649
  commitOffsetsIfNecessary
532
650
  };
533
651
  await executeWithRetry(
534
- () => handleBatch(envelopes, meta),
652
+ () => {
653
+ const fn = () => handleBatch(envelopes, meta);
654
+ return timeoutMs ? this.wrapWithTimeoutWarning(fn, timeoutMs, batch.topic) : fn();
655
+ },
535
656
  {
536
657
  envelope: envelopes,
537
658
  rawMessages: batch.messages.filter((m) => m.value).map((m) => m.value.toString()),
@@ -545,17 +666,59 @@ var KafkaClient = class {
545
666
  }
546
667
  });
547
668
  this.runningConsumers.set(gid, "eachBatch");
669
+ return { groupId: gid, stop: () => this.stopConsumer(gid) };
548
670
  }
549
671
  // ── Consumer lifecycle ───────────────────────────────────────────
550
- async stopConsumer() {
551
- const tasks = [];
552
- for (const consumer of this.consumers.values()) {
553
- tasks.push(consumer.disconnect());
672
+ async stopConsumer(groupId) {
673
+ if (groupId !== void 0) {
674
+ const consumer = this.consumers.get(groupId);
675
+ if (!consumer) {
676
+ this.logger.warn(
677
+ `stopConsumer: no active consumer for group "${groupId}"`
678
+ );
679
+ return;
680
+ }
681
+ await consumer.disconnect().catch(() => {
682
+ });
683
+ this.consumers.delete(groupId);
684
+ this.runningConsumers.delete(groupId);
685
+ this.logger.log(`Consumer disconnected: group "${groupId}"`);
686
+ } else {
687
+ const tasks = Array.from(this.consumers.values()).map(
688
+ (c) => c.disconnect().catch(() => {
689
+ })
690
+ );
691
+ await Promise.allSettled(tasks);
692
+ this.consumers.clear();
693
+ this.runningConsumers.clear();
694
+ this.logger.log("All consumers disconnected");
554
695
  }
555
- await Promise.allSettled(tasks);
556
- this.consumers.clear();
557
- this.runningConsumers.clear();
558
- this.logger.log("All consumers disconnected");
696
+ }
697
+ /**
698
+ * Query consumer group lag per partition.
699
+ * Lag = broker high-watermark − last committed offset.
700
+ * A committed offset of -1 (nothing committed yet) counts as full lag.
701
+ */
702
+ async getConsumerLag(groupId) {
703
+ const gid = groupId ?? this.defaultGroupId;
704
+ if (!this.isAdminConnected) {
705
+ await this.admin.connect();
706
+ this.isAdminConnected = true;
707
+ }
708
+ const committedByTopic = await this.admin.fetchOffsets({ groupId: gid });
709
+ const result = [];
710
+ for (const { topic: topic2, partitions } of committedByTopic) {
711
+ const brokerOffsets = await this.admin.fetchTopicOffsets(topic2);
712
+ for (const { partition, offset } of partitions) {
713
+ const broker = brokerOffsets.find((o) => o.partition === partition);
714
+ if (!broker) continue;
715
+ const committed = parseInt(offset, 10);
716
+ const high = parseInt(broker.high, 10);
717
+ const lag = committed === -1 ? high : Math.max(0, high - committed);
718
+ result.push({ topic: topic2, partition, lag });
719
+ }
720
+ }
721
+ return result;
559
722
  }
560
723
  /** Check broker connectivity and return status, clientId, and available topics. */
561
724
  async checkStatus() {
@@ -588,18 +751,219 @@ var KafkaClient = class {
588
751
  this.runningConsumers.clear();
589
752
  this.logger.log("All connections closed");
590
753
  }
754
+ // ── Retry topic chain ────────────────────────────────────────────
755
+ /**
756
+ * Auto-start companion consumers on `<topic>.retry` for each original topic.
757
+ * Called by `startConsumer` when `retryTopics: true`.
758
+ *
759
+ * Flow per message:
760
+ * 1. Sleep until `x-retry-after` (scheduled by the main consumer or previous retry hop)
761
+ * 2. Call the original handler
762
+ * 3. On failure: if retries remain → re-send to `<originalTopic>.retry` with incremented attempt
763
+ * if exhausted → DLQ or onMessageLost
764
+ */
765
+ async startRetryTopicConsumers(originalTopics, originalGroupId, handleMessage, retry, dlq, interceptors, schemaMap) {
766
+ const retryTopicNames = originalTopics.map((t) => `${t}.retry`);
767
+ const retryGroupId = `${originalGroupId}-retry`;
768
+ const backoffMs = retry.backoffMs ?? 1e3;
769
+ const maxBackoffMs = retry.maxBackoffMs ?? 3e4;
770
+ const deps = {
771
+ logger: this.logger,
772
+ producer: this.producer,
773
+ instrumentation: this.instrumentation,
774
+ onMessageLost: this.onMessageLost
775
+ };
776
+ for (const rt of retryTopicNames) {
777
+ await this.ensureTopic(rt);
778
+ }
779
+ const consumer = this.getOrCreateConsumer(retryGroupId, false, true);
780
+ await consumer.connect();
781
+ await subscribeWithRetry(consumer, retryTopicNames, this.logger);
782
+ await consumer.run({
783
+ eachMessage: async ({ topic: retryTopic, partition, message }) => {
784
+ if (!message.value) return;
785
+ const raw = message.value.toString();
786
+ const parsed = parseJsonMessage(raw, retryTopic, this.logger);
787
+ if (parsed === null) return;
788
+ const headers = decodeHeaders(message.headers);
789
+ const originalTopic = headers[RETRY_HEADER_ORIGINAL_TOPIC] ?? retryTopic.replace(/\.retry$/, "");
790
+ const currentAttempt = parseInt(
791
+ headers[RETRY_HEADER_ATTEMPT] ?? "1",
792
+ 10
793
+ );
794
+ const maxRetries = parseInt(
795
+ headers[RETRY_HEADER_MAX_RETRIES] ?? String(retry.maxRetries),
796
+ 10
797
+ );
798
+ const retryAfter = parseInt(
799
+ headers[RETRY_HEADER_AFTER] ?? "0",
800
+ 10
801
+ );
802
+ const remaining = retryAfter - Date.now();
803
+ if (remaining > 0) {
804
+ consumer.pause([{ topic: retryTopic, partitions: [partition] }]);
805
+ await sleep(remaining);
806
+ consumer.resume([{ topic: retryTopic, partitions: [partition] }]);
807
+ }
808
+ const validated = await validateWithSchema(
809
+ parsed,
810
+ raw,
811
+ originalTopic,
812
+ schemaMap,
813
+ interceptors,
814
+ dlq,
815
+ { ...deps, originalHeaders: headers }
816
+ );
817
+ if (validated === null) return;
818
+ const envelope = extractEnvelope(
819
+ validated,
820
+ headers,
821
+ originalTopic,
822
+ partition,
823
+ message.offset
824
+ );
825
+ try {
826
+ const cleanups = [];
827
+ for (const inst of this.instrumentation) {
828
+ const c = inst.beforeConsume?.(envelope);
829
+ if (typeof c === "function") cleanups.push(c);
830
+ }
831
+ for (const interceptor of interceptors)
832
+ await interceptor.before?.(envelope);
833
+ await runWithEnvelopeContext(
834
+ {
835
+ correlationId: envelope.correlationId,
836
+ traceparent: envelope.traceparent
837
+ },
838
+ () => handleMessage(envelope)
839
+ );
840
+ for (const interceptor of interceptors)
841
+ await interceptor.after?.(envelope);
842
+ for (const cleanup of cleanups) cleanup();
843
+ } catch (error) {
844
+ const err = toError(error);
845
+ const nextAttempt = currentAttempt + 1;
846
+ const exhausted = currentAttempt >= maxRetries;
847
+ for (const inst of this.instrumentation)
848
+ inst.onConsumeError?.(envelope, err);
849
+ const reportedError = exhausted && maxRetries > 1 ? new KafkaRetryExhaustedError(
850
+ originalTopic,
851
+ [envelope.payload],
852
+ maxRetries,
853
+ { cause: err }
854
+ ) : err;
855
+ for (const interceptor of interceptors) {
856
+ await interceptor.onError?.(envelope, reportedError);
857
+ }
858
+ this.logger.error(
859
+ `Retry consumer error for ${originalTopic} (attempt ${currentAttempt}/${maxRetries}):`,
860
+ err.stack
861
+ );
862
+ if (!exhausted) {
863
+ const cap = Math.min(backoffMs * 2 ** currentAttempt, maxBackoffMs);
864
+ const delay = Math.floor(Math.random() * cap);
865
+ await sendToRetryTopic(
866
+ originalTopic,
867
+ [raw],
868
+ nextAttempt,
869
+ maxRetries,
870
+ delay,
871
+ headers,
872
+ deps
873
+ );
874
+ } else if (dlq) {
875
+ await sendToDlq(originalTopic, raw, deps, {
876
+ error: err,
877
+ // +1 to account for the main consumer's initial attempt before
878
+ // routing to the retry topic, making this consistent with the
879
+ // in-process retry path where attempt counts all tries.
880
+ attempt: currentAttempt + 1,
881
+ originalHeaders: headers
882
+ });
883
+ } else {
884
+ await deps.onMessageLost?.({
885
+ topic: originalTopic,
886
+ error: err,
887
+ attempt: currentAttempt,
888
+ headers
889
+ });
890
+ }
891
+ }
892
+ }
893
+ });
894
+ this.runningConsumers.set(retryGroupId, "eachMessage");
895
+ await this.waitForPartitionAssignment(consumer, retryTopicNames);
896
+ this.logger.log(
897
+ `Retry topic consumers started for: ${originalTopics.join(", ")} (group: ${retryGroupId})`
898
+ );
899
+ }
591
900
  // ── Private helpers ──────────────────────────────────────────────
901
+ /**
902
+ * Poll `consumer.assignment()` until the consumer has received at least one
903
+ * partition for the given topics, then return. Logs a warning and returns
904
+ * (rather than throwing) on timeout so that a slow broker does not break
905
+ * the caller — in the worst case a message sent immediately after would be
906
+ * missed, which is the same behaviour as before this guard was added.
907
+ */
908
+ async waitForPartitionAssignment(consumer, topics, timeoutMs = 1e4) {
909
+ const topicSet = new Set(topics);
910
+ const deadline = Date.now() + timeoutMs;
911
+ while (Date.now() < deadline) {
912
+ try {
913
+ const assigned = consumer.assignment();
914
+ if (assigned.some((a) => topicSet.has(a.topic))) return;
915
+ } catch {
916
+ }
917
+ await sleep(200);
918
+ }
919
+ this.logger.warn(
920
+ `Retry consumer did not receive partition assignments for [${topics.join(", ")}] within ${timeoutMs}ms`
921
+ );
922
+ }
592
923
  getOrCreateConsumer(groupId, fromBeginning, autoCommit) {
593
924
  if (!this.consumers.has(groupId)) {
594
- this.consumers.set(
595
- groupId,
596
- this.kafka.consumer({
597
- kafkaJS: { groupId, fromBeginning, autoCommit }
598
- })
599
- );
925
+ const config = {
926
+ kafkaJS: { groupId, fromBeginning, autoCommit }
927
+ };
928
+ if (this.onRebalance) {
929
+ const onRebalance = this.onRebalance;
930
+ config["rebalance_cb"] = (err, assignment) => {
931
+ const type = err.code === -175 ? "assign" : "revoke";
932
+ try {
933
+ onRebalance(
934
+ type,
935
+ assignment.map((p) => ({
936
+ topic: p.topic,
937
+ partition: p.partition
938
+ }))
939
+ );
940
+ } catch (e) {
941
+ this.logger.warn(
942
+ `onRebalance callback threw: ${e.message}`
943
+ );
944
+ }
945
+ };
946
+ }
947
+ this.consumers.set(groupId, this.kafka.consumer(config));
600
948
  }
601
949
  return this.consumers.get(groupId);
602
950
  }
951
+ /**
952
+ * Start a timer that logs a warning if `fn` hasn't resolved within `timeoutMs`.
953
+ * The handler itself is not cancelled — the warning is diagnostic only.
954
+ */
955
+ wrapWithTimeoutWarning(fn, timeoutMs, topic2) {
956
+ let timer;
957
+ const promise = fn().finally(() => {
958
+ if (timer !== void 0) clearTimeout(timer);
959
+ });
960
+ timer = setTimeout(() => {
961
+ this.logger.warn(
962
+ `Handler for topic "${topic2}" has not resolved after ${timeoutMs}ms \u2014 possible stuck handler`
963
+ );
964
+ }, timeoutMs);
965
+ return promise;
966
+ }
603
967
  resolveTopicName(topicOrDescriptor) {
604
968
  if (typeof topicOrDescriptor === "string") return topicOrDescriptor;
605
969
  if (topicOrDescriptor && typeof topicOrDescriptor === "object" && "__topic" in topicOrDescriptor) {
@@ -656,7 +1020,9 @@ var KafkaClient = class {
656
1020
  inst.beforeSend?.(topic2, envelopeHeaders);
657
1021
  }
658
1022
  return {
659
- value: JSON.stringify(await this.validateMessage(topicOrDesc, m.value)),
1023
+ value: JSON.stringify(
1024
+ await this.validateMessage(topicOrDesc, m.value)
1025
+ ),
660
1026
  key: m.key ?? null,
661
1027
  headers: envelopeHeaders
662
1028
  };
@@ -682,7 +1048,11 @@ var KafkaClient = class {
682
1048
  `Cannot use ${mode} on consumer group "${gid}" \u2014 it is already running with ${oppositeMode}. Use a different groupId for this consumer.`
683
1049
  );
684
1050
  }
685
- const consumer = this.getOrCreateConsumer(gid, fromBeginning, options.autoCommit ?? true);
1051
+ const consumer = this.getOrCreateConsumer(
1052
+ gid,
1053
+ fromBeginning,
1054
+ options.autoCommit ?? true
1055
+ );
686
1056
  const schemaMap = this.buildSchemaMap(topics, optionSchemas);
687
1057
  const topicNames = topics.map(
688
1058
  (t) => this.resolveTopicName(t)
@@ -696,7 +1066,12 @@ var KafkaClient = class {
696
1066
  }
697
1067
  }
698
1068
  await consumer.connect();
699
- await subscribeWithRetry(consumer, topicNames, this.logger, options.subscribeRetry);
1069
+ await subscribeWithRetry(
1070
+ consumer,
1071
+ topicNames,
1072
+ this.logger,
1073
+ options.subscribeRetry
1074
+ );
700
1075
  this.logger.log(
701
1076
  `${mode === "eachBatch" ? "Batch consumer" : "Consumer"} subscribed to topics: ${topicNames.join(", ")}`
702
1077
  );