@eventferry/kafka 3.3.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -51,6 +51,7 @@ function classifyKafkajsError(err) {
51
51
  if (e.name === "KafkaJSNonRetriableError") return "fatal";
52
52
  const type = typeof e.type === "string" ? e.type : void 0;
53
53
  if (type) {
54
+ if (FENCED_TYPES.has(type)) return "fenced";
54
55
  if (RETRIABLE_TYPES.has(type)) return "retriable";
55
56
  if (POISON_TYPES.has(type)) return "poison";
56
57
  if (FATAL_TYPES.has(type)) return "fatal";
@@ -84,9 +85,11 @@ var POISON_TYPES = /* @__PURE__ */ new Set([
84
85
  "INVALID_REQUIRED_ACKS",
85
86
  "INVALID_PARTITIONS"
86
87
  ]);
87
- var FATAL_TYPES = /* @__PURE__ */ new Set([
88
+ var FENCED_TYPES = /* @__PURE__ */ new Set([
88
89
  "INVALID_PRODUCER_EPOCH",
89
- "PRODUCER_FENCED",
90
+ "PRODUCER_FENCED"
91
+ ]);
92
+ var FATAL_TYPES = /* @__PURE__ */ new Set([
90
93
  "TOPIC_AUTHORIZATION_FAILED",
91
94
  "CLUSTER_AUTHORIZATION_FAILED",
92
95
  "TRANSACTIONAL_ID_AUTHORIZATION_FAILED",
@@ -117,8 +120,8 @@ var CODE_TO_KIND = /* @__PURE__ */ new Map([
117
120
  // TOPIC_AUTHORIZATION_FAILED
118
121
  [31, "fatal"],
119
122
  // CLUSTER_AUTHORIZATION_FAILED
120
- [47, "fatal"],
121
- // INVALID_PRODUCER_EPOCH
123
+ [47, "fenced"],
124
+ // INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
122
125
  [58, "fatal"],
123
126
  // SASL_AUTHENTICATION_FAILED
124
127
  [74, "retriable"],
@@ -148,7 +151,13 @@ var UNSUPPORTED_BY_KAFKAJS = [
148
151
  "lingerMs",
149
152
  "batchSize",
150
153
  "deliveryTimeoutMs",
151
- "maxRequestSize"
154
+ "maxRequestSize",
155
+ // Confluent-only escape hatches; ignored on kafkajs.
156
+ "compressionLevel",
157
+ "rawProducerConfig",
158
+ // librdkafka stats — kafkajs has no equivalent surface.
159
+ "onStats",
160
+ "statsIntervalMs"
152
161
  ];
153
162
  var KafkaJsDriver = class {
154
163
  transactional;
@@ -187,13 +196,21 @@ var KafkaJsDriver = class {
187
196
  // the provider's returned token (other fields are ignored).
188
197
  sasl: this.opts.sasl
189
198
  });
190
- const createPartitioner = resolveCreatePartitioner(
191
- mod.Partitioners,
199
+ return kafka.producer(await this.buildProducerOptions(mod.Partitioners));
200
+ }
201
+ /**
202
+ * Compute the options object passed to `kafka.producer({...})`. Exposed
203
+ * as a test seam so power-user escape hatches (customPartitioner,
204
+ * rawKafkaJsProducerConfig) can be asserted without a live broker.
205
+ */
206
+ async buildProducerOptions(partitioners) {
207
+ const createPartitioner = this.opts.customPartitioner ?? resolveCreatePartitioner(
208
+ partitioners,
192
209
  this.opts.partitioner,
193
210
  this.transactional
194
211
  );
195
212
  const resolvedTxId = this.transactional ? await resolveTransactionalId(this.opts.transactionalId) : void 0;
196
- return kafka.producer({
213
+ return {
197
214
  idempotent: this.opts.idempotent ?? true,
198
215
  // Idempotent / transactional producers cap maxInFlight at 5. When the
199
216
  // user picks transactional we force 1 to keep strict ordering across
@@ -207,13 +224,32 @@ var KafkaJsDriver = class {
207
224
  transactionTimeout: this.opts.transactionTimeoutMs,
208
225
  // Setting any partitioner choice silences kafkajs's
209
226
  // KafkaJSPartitionerNotSpecified warning.
210
- createPartitioner
211
- });
227
+ createPartitioner,
228
+ // Power-user escape hatch — merged LAST so raw keys win against the
229
+ // translated ones. That's the contract: anything you put here is
230
+ // final, even if it overrides idempotent/transactionalId/etc.
231
+ ...this.opts.rawKafkaJsProducerConfig ?? {}
232
+ };
212
233
  }
213
234
  async disconnect() {
214
235
  await this.producer?.disconnect();
215
236
  this.producer = null;
216
237
  }
238
+ /**
239
+ * Construct a kafkajs admin client wrapped in the eventferry-facing
240
+ * `KafkaDriverAdmin` shape. The publisher calls `.connect()` on the
241
+ * returned object before exposing it via `publisher.admin()`.
242
+ */
243
+ async admin() {
244
+ const mod = await importKafkaJs();
245
+ const kafka = new mod.Kafka({
246
+ clientId: this.opts.clientId ?? "eventferry-admin",
247
+ brokers: this.opts.brokers,
248
+ ssl: this.opts.ssl,
249
+ sasl: this.opts.sasl
250
+ });
251
+ return new KafkaJsAdmin(kafka.admin());
252
+ }
217
253
  async sendBatch(messages) {
218
254
  if (!this.producer) throw new Error("KafkaJsDriver not connected");
219
255
  const topicMessages = groupByTopic(messages, this.opts.compression);
@@ -302,6 +338,69 @@ function warnUnsupportedKafkajsOptions(opts) {
302
338
  function _resetKafkajsWarnDedup() {
303
339
  warnedKafkajsKeys.clear();
304
340
  }
341
+ var KafkaJsAdmin = class {
342
+ constructor(client) {
343
+ this.client = client;
344
+ }
345
+ client;
346
+ async connect() {
347
+ await this.client.connect();
348
+ }
349
+ async close() {
350
+ await this.client.disconnect();
351
+ }
352
+ async listTopics() {
353
+ return await this.client.listTopics();
354
+ }
355
+ async describeTopics(topics) {
356
+ if (topics.length === 0) return [];
357
+ const all = new Set(await this.client.listTopics());
358
+ const existing = topics.filter((t) => all.has(t));
359
+ const missing = topics.filter((t) => !all.has(t));
360
+ const meta = existing.length ? await this.client.fetchTopicMetadata({ topics: existing }) : { topics: [] };
361
+ const byName = new Map(meta.topics.map((t) => [t.name, t]));
362
+ return topics.map((topic) => {
363
+ if (missing.includes(topic)) return { topic, partitions: [] };
364
+ const found = byName.get(topic);
365
+ if (!found) return { topic, partitions: [] };
366
+ return {
367
+ topic,
368
+ partitions: found.partitions.map((p) => ({
369
+ partitionId: p.partitionId,
370
+ leader: p.leader,
371
+ replicas: p.replicas,
372
+ isr: p.isr
373
+ }))
374
+ };
375
+ });
376
+ }
377
+ async createTopics(specs) {
378
+ if (specs.length === 0) return;
379
+ const topics = specs.map((s) => ({
380
+ topic: s.topic,
381
+ numPartitions: s.numPartitions,
382
+ replicationFactor: s.replicationFactor,
383
+ configEntries: s.configEntries ? Object.entries(s.configEntries).map(([name, value]) => ({ name, value })) : void 0
384
+ }));
385
+ try {
386
+ await this.client.createTopics({ topics, waitForLeaders: true });
387
+ } catch (err) {
388
+ const e = err;
389
+ if (e?.type === "TOPIC_ALREADY_EXISTS") return;
390
+ if (/already exists/i.test(e?.message ?? "")) return;
391
+ throw err;
392
+ }
393
+ }
394
+ async createPartitions(specs) {
395
+ if (specs.length === 0) return;
396
+ await this.client.createPartitions({
397
+ topicPartitions: specs.map((s) => ({
398
+ topic: s.topic,
399
+ count: s.totalCount
400
+ }))
401
+ });
402
+ }
403
+ };
305
404
  async function importKafkaJs() {
306
405
  try {
307
406
  return await import("kafkajs");
@@ -344,8 +443,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
344
443
  // ERR__TRANSPORT
345
444
  [-198, "poison"],
346
445
  // ERR__BAD_COMPRESSION
347
- [-144, "fatal"],
348
- // ERR__FENCED — producer fenced by another with same txn id
446
+ [-144, "fenced"],
447
+ // ERR__FENCED — producer fenced; publisher reconnect attempts a transparent recovery once
349
448
  [-150, "fatal"],
350
449
  // ERR__FATAL — unrecoverable librdkafka error
351
450
  [-169, "fatal"],
@@ -377,8 +476,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
377
476
  // TOPIC_AUTHORIZATION_FAILED
378
477
  [31, "fatal"],
379
478
  // CLUSTER_AUTHORIZATION_FAILED
380
- [47, "fatal"],
381
- // INVALID_PRODUCER_EPOCH
479
+ [47, "fenced"],
480
+ // INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
382
481
  [58, "fatal"],
383
482
  // SASL_AUTHENTICATION_FAILED
384
483
  [74, "retriable"],
@@ -392,7 +491,7 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
392
491
  ]);
393
492
  var NAME_TO_KIND = /* @__PURE__ */ new Map([
394
493
  ["ERR__QUEUE_FULL", "backpressure"],
395
- ["ERR__FENCED", "fatal"],
494
+ ["ERR__FENCED", "fenced"],
396
495
  ["ERR__FATAL", "fatal"],
397
496
  ["ERR__AUTHENTICATION", "fatal"],
398
497
  ["ERR__SSL", "fatal"],
@@ -401,7 +500,7 @@ var NAME_TO_KIND = /* @__PURE__ */ new Map([
401
500
  ["ERR__BAD_COMPRESSION", "poison"],
402
501
  ["ERR_TOPIC_AUTHORIZATION_FAILED", "fatal"],
403
502
  ["ERR_CLUSTER_AUTHORIZATION_FAILED", "fatal"],
404
- ["ERR_INVALID_PRODUCER_EPOCH", "fatal"],
503
+ ["ERR_INVALID_PRODUCER_EPOCH", "fenced"],
405
504
  ["ERR_SASL_AUTHENTICATION_FAILED", "fatal"],
406
505
  ["ERR_CORRUPT_MESSAGE", "poison"],
407
506
  ["ERR_MSG_SIZE_TOO_LARGE", "poison"],
@@ -434,6 +533,15 @@ function buildConfluentClientConfig(opts) {
434
533
  if (opts.transactionTimeoutMs !== void 0) {
435
534
  librdkafka["transaction.timeout.ms"] = opts.transactionTimeoutMs;
436
535
  }
536
+ if (opts.compressionLevel !== void 0) {
537
+ librdkafka["compression.level"] = opts.compressionLevel;
538
+ }
539
+ if (opts.onStats) {
540
+ librdkafka["stats_cb"] = wrapStatsCallback(opts.onStats);
541
+ librdkafka["statistics.interval.ms"] = opts.statsIntervalMs ?? 3e4;
542
+ } else if (opts.statsIntervalMs !== void 0) {
543
+ librdkafka["statistics.interval.ms"] = opts.statsIntervalMs;
544
+ }
437
545
  const tlsRequested = opts.ssl === true || isTlsConfig(opts.ssl);
438
546
  const saslRequested = !!opts.sasl;
439
547
  if (saslRequested && tlsRequested) {
@@ -463,11 +571,28 @@ function buildConfluentClientConfig(opts) {
463
571
  if (opts.sasl) {
464
572
  kafkaJS["sasl"] = opts.sasl;
465
573
  }
574
+ if (opts.rawProducerConfig) {
575
+ Object.assign(librdkafka, opts.rawProducerConfig);
576
+ }
466
577
  return { kafkaJS, librdkafka };
467
578
  }
468
579
  function isTlsConfig(v) {
469
580
  return typeof v === "object" && v !== null;
470
581
  }
582
+ function wrapStatsCallback(onStats) {
583
+ return (raw) => {
584
+ let parsed;
585
+ try {
586
+ parsed = typeof raw === "string" ? JSON.parse(raw) : raw;
587
+ } catch {
588
+ return;
589
+ }
590
+ try {
591
+ onStats(parsed);
592
+ } catch {
593
+ }
594
+ };
595
+ }
471
596
  function stringifyPem(input) {
472
597
  if (Array.isArray(input)) {
473
598
  return input.map((x) => typeof x === "string" ? x : x.toString("utf8")).join("\n");
@@ -516,6 +641,17 @@ var ConfluentDriver = class {
516
641
  await this.producer?.disconnect();
517
642
  this.producer = null;
518
643
  }
644
+ /**
645
+ * Construct a librdkafka-backed admin client wrapped in the eventferry
646
+ * `KafkaDriverAdmin` shape. The publisher's `connect()` is called before
647
+ * the admin reaches the user.
648
+ */
649
+ async admin() {
650
+ const mod = await importConfluent();
651
+ const { kafkaJS, librdkafka } = buildConfluentClientConfig(this.opts);
652
+ const kafka = new mod.KafkaJS.Kafka({ kafkaJS, ...librdkafka });
653
+ return new ConfluentAdmin(kafka.admin());
654
+ }
519
655
  async sendBatch(messages) {
520
656
  if (!this.producer) throw new Error("ConfluentDriver not connected");
521
657
  const topicMessages = groupByTopic2(messages);
@@ -584,6 +720,69 @@ function groupByTopic2(messages) {
584
720
  messages: msgs
585
721
  }));
586
722
  }
723
+ var ConfluentAdmin = class {
724
+ constructor(client) {
725
+ this.client = client;
726
+ }
727
+ client;
728
+ async connect() {
729
+ await this.client.connect();
730
+ }
731
+ async close() {
732
+ await this.client.disconnect();
733
+ }
734
+ async listTopics() {
735
+ return await this.client.listTopics();
736
+ }
737
+ async describeTopics(topics) {
738
+ if (topics.length === 0) return [];
739
+ const all = new Set(await this.client.listTopics());
740
+ const existing = topics.filter((t) => all.has(t));
741
+ const missing = topics.filter((t) => !all.has(t));
742
+ const meta = existing.length ? await this.client.fetchTopicMetadata({ topics: existing }) : { topics: [] };
743
+ const byName = new Map(meta.topics.map((t) => [t.name, t]));
744
+ return topics.map((topic) => {
745
+ if (missing.includes(topic)) return { topic, partitions: [] };
746
+ const found = byName.get(topic);
747
+ if (!found) return { topic, partitions: [] };
748
+ return {
749
+ topic,
750
+ partitions: found.partitions.map((p) => ({
751
+ partitionId: p.partitionId,
752
+ leader: p.leader,
753
+ replicas: p.replicas,
754
+ isr: p.isr
755
+ }))
756
+ };
757
+ });
758
+ }
759
+ async createTopics(specs) {
760
+ if (specs.length === 0) return;
761
+ const topics = specs.map((s) => ({
762
+ topic: s.topic,
763
+ numPartitions: s.numPartitions,
764
+ replicationFactor: s.replicationFactor,
765
+ configEntries: s.configEntries ? Object.entries(s.configEntries).map(([name, value]) => ({ name, value })) : void 0
766
+ }));
767
+ try {
768
+ await this.client.createTopics({ topics, waitForLeaders: true });
769
+ } catch (err) {
770
+ const e = err;
771
+ if (e?.code === 36 || e?.name === "TOPIC_ALREADY_EXISTS") return;
772
+ if (/already exists/i.test(e?.message ?? "")) return;
773
+ throw err;
774
+ }
775
+ }
776
+ async createPartitions(specs) {
777
+ if (specs.length === 0) return;
778
+ await this.client.createPartitions({
779
+ topicPartitions: specs.map((s) => ({
780
+ topic: s.topic,
781
+ count: s.totalCount
782
+ }))
783
+ });
784
+ }
785
+ };
587
786
  async function importConfluent() {
588
787
  try {
589
788
  return await import("@confluentinc/kafka-javascript");
@@ -634,10 +833,18 @@ var KafkaPublisher = class {
634
833
  logger;
635
834
  hooks;
636
835
  tracer;
836
+ validateTopicsOnConnect;
837
+ autoRecoverFromFence;
838
+ // Serialize reconnects so concurrent publish() calls hitting a fence
839
+ // all observe the same single reconnect attempt — the second publish
840
+ // doesn't try to disconnect a producer the first is still re-initing.
841
+ fenceRecovery = null;
637
842
  constructor(opts) {
638
843
  this.logger = opts.logger;
639
844
  this.hooks = opts.hooks ?? {};
640
845
  this.tracer = opts.tracer ?? new NoopKafkaTracer();
846
+ this.validateTopicsOnConnect = opts.validateTopicsOnConnect ? Object.freeze([...opts.validateTopicsOnConnect]) : void 0;
847
+ this.autoRecoverFromFence = opts.autoRecoverFromFence ?? false;
641
848
  const onTransactionAbort = this.hooks.onTransactionAbort ? (error) => {
642
849
  void safeHook(
643
850
  this.logger,
@@ -649,8 +856,90 @@ var KafkaPublisher = class {
649
856
  }
650
857
  async connect() {
651
858
  await this.driver.connect();
859
+ if (this.validateTopicsOnConnect && this.validateTopicsOnConnect.length) {
860
+ await this.assertTopicsExist(this.validateTopicsOnConnect);
861
+ }
652
862
  await safeHook(this.logger, "onConnect", () => this.hooks.onConnect?.());
653
863
  }
864
+ /**
865
+ * Borrow a new admin client from the driver. The returned admin is
866
+ * connected and ready to use; the CALLER must `close()` it. Throws if the
867
+ * driver does not implement admin (custom driver lacking the capability).
868
+ */
869
+ async admin() {
870
+ const driverAdmin = await this.openDriverAdmin();
871
+ return driverAdmin;
872
+ }
873
+ /**
874
+ * Idempotently provision topics. Each spec creates the topic if absent;
875
+ * existing topics are skipped without error. If `growPartitions: true`
876
+ * (default false), topics whose current partition count is below the
877
+ * requested `numPartitions` are grown via `createPartitions`.
878
+ *
879
+ * Replication factor and config entries on EXISTING topics are NOT
880
+ * reconciled — Kafka does not provide a safe in-place alter for those
881
+ * (changing replication requires reassignment; configs use alterConfigs).
882
+ * Reach for the raw admin if you need that.
883
+ */
884
+ async ensureTopics(specs, opts = {}) {
885
+ if (specs.length === 0) return;
886
+ const admin = await this.openDriverAdmin();
887
+ try {
888
+ const topicNames = specs.map((s) => s.topic);
889
+ const existing = await admin.describeTopics(topicNames);
890
+ const existingByName = new Map(existing.map((t) => [t.topic, t]));
891
+ const toCreate = specs.filter(
892
+ (s) => (existingByName.get(s.topic)?.partitions.length ?? 0) === 0
893
+ );
894
+ if (toCreate.length) await admin.createTopics(toCreate);
895
+ if (opts.growPartitions) {
896
+ const grow = [];
897
+ for (const s of specs) {
898
+ if (s.numPartitions === void 0) continue;
899
+ const current = existingByName.get(s.topic);
900
+ const currentCount = current?.partitions.length ?? 0;
901
+ if (currentCount > 0 && currentCount < s.numPartitions) {
902
+ grow.push({ topic: s.topic, totalCount: s.numPartitions });
903
+ }
904
+ }
905
+ if (grow.length) await admin.createPartitions(grow);
906
+ }
907
+ } finally {
908
+ await admin.close();
909
+ }
910
+ }
911
+ /**
912
+ * Borrow a fresh admin from the driver and connect it. Throws when the
913
+ * driver does not implement admin (custom drivers without that capability).
914
+ */
915
+ async openDriverAdmin() {
916
+ if (!this.driver.admin) {
917
+ throw new Error(
918
+ "KafkaPublisher: configured driver does not implement admin(). Use the built-in kafkajs or confluent driver, or extend your custom driver."
919
+ );
920
+ }
921
+ const admin = await this.driver.admin();
922
+ await admin.connect();
923
+ return admin;
924
+ }
925
+ /**
926
+ * Open an admin, list topics, throw if any required topic is missing.
927
+ * Always closes the admin (success or failure).
928
+ */
929
+ async assertTopicsExist(required) {
930
+ const admin = await this.openDriverAdmin();
931
+ try {
932
+ const all = new Set(await admin.listTopics());
933
+ const missing = required.filter((t) => !all.has(t));
934
+ if (missing.length) {
935
+ throw new Error(
936
+ `KafkaPublisher: validateTopicsOnConnect failed \u2014 topics missing on cluster: ${missing.join(", ")}`
937
+ );
938
+ }
939
+ } finally {
940
+ await admin.close();
941
+ }
942
+ }
654
943
  async disconnect() {
655
944
  await this.driver.disconnect();
656
945
  await safeHook(
@@ -662,9 +951,14 @@ var KafkaPublisher = class {
662
951
  async publish(messages) {
663
952
  if (messages.length === 0) return [];
664
953
  const span = this.startBatchSpan(messages);
954
+ const outgoing = this.tracer.inject ? messages.map((m) => {
955
+ const headers = { ...m.headers };
956
+ this.tracer.inject(span, headers);
957
+ return { ...m, headers };
958
+ }) : messages;
665
959
  let results;
666
960
  try {
667
- results = await this.driver.sendBatch(messages);
961
+ results = await this.driver.sendBatch(outgoing);
668
962
  } catch (err) {
669
963
  const error = err instanceof Error ? err : new Error(String(err));
670
964
  span.setStatus({ code: "error", message: error.message });
@@ -673,6 +967,20 @@ var KafkaPublisher = class {
673
967
  await safeHook(this.logger, "onError", () => this.hooks.onError?.(error));
674
968
  throw err;
675
969
  }
970
+ const firstFenced = results.find(
971
+ (r) => !r.ok && r.errorKind === "fenced"
972
+ );
973
+ if (firstFenced) {
974
+ const fenceErr = firstFenced.error ?? new Error("producer fenced");
975
+ await safeHook(
976
+ this.logger,
977
+ "onProducerFenced",
978
+ () => this.hooks.onProducerFenced?.(fenceErr)
979
+ );
980
+ if (this.autoRecoverFromFence) {
981
+ results = await this.recoverAndRetry(outgoing, results);
982
+ }
983
+ }
676
984
  const byId = new Map(messages.map((m) => [m.recordId, m]));
677
985
  let allOk = true;
678
986
  for (const r of results) {
@@ -723,6 +1031,110 @@ var KafkaPublisher = class {
723
1031
  get transactional() {
724
1032
  return this.driver.transactional;
725
1033
  }
1034
+ /**
1035
+ * Cheap reachability probe. Borrows a fresh admin client, calls
1036
+ * `listTopics`, and returns timing + outcome. Useful as the body of a
1037
+ * `/healthz` or `/readyz` endpoint — proves the broker is reachable
1038
+ * AND that the configured credentials still authenticate against it,
1039
+ * without writing a record.
1040
+ *
1041
+ * Does NOT exercise the producer's send path — a healthy admin
1042
+ * connection doesn't guarantee `publish()` will succeed (a fenced
1043
+ * transactional producer would still answer healthy here). Treat this
1044
+ * as "broker reachable + auth still good", not "publisher is fully
1045
+ * operational".
1046
+ *
1047
+ * Default timeout 5_000 ms — long enough to ride out a single broker
1048
+ * leader election, short enough to fail a liveness probe meaningfully.
1049
+ * Set `timeoutMs: 0` to disable the timer entirely.
1050
+ *
1051
+ * The driver must implement `admin()` (the built-ins do); custom
1052
+ * drivers without admin get `{ ok: false, error: ... }` instead of
1053
+ * the throw `publisher.admin()` would surface — health checks are
1054
+ * not the place to crash.
1055
+ */
1056
+ async healthCheck(opts = {}) {
1057
+ const timeoutMs = opts.timeoutMs ?? 5e3;
1058
+ const startedAt = Date.now();
1059
+ if (!this.driver.admin) {
1060
+ return {
1061
+ ok: false,
1062
+ latencyMs: 0,
1063
+ timestamp: startedAt,
1064
+ error: new Error(
1065
+ "KafkaPublisher.healthCheck: configured driver does not implement admin()"
1066
+ )
1067
+ };
1068
+ }
1069
+ let admin = null;
1070
+ try {
1071
+ admin = await this.driver.admin();
1072
+ await admin.connect();
1073
+ const probe = admin.listTopics();
1074
+ if (timeoutMs > 0) {
1075
+ await raceWithTimeout(probe, timeoutMs, "healthCheck");
1076
+ } else {
1077
+ await probe;
1078
+ }
1079
+ return {
1080
+ ok: true,
1081
+ latencyMs: Date.now() - startedAt,
1082
+ timestamp: startedAt
1083
+ };
1084
+ } catch (err) {
1085
+ const error = err instanceof Error ? err : new Error(String(err));
1086
+ return {
1087
+ ok: false,
1088
+ latencyMs: Date.now() - startedAt,
1089
+ timestamp: startedAt,
1090
+ error
1091
+ };
1092
+ } finally {
1093
+ try {
1094
+ await admin?.close();
1095
+ } catch {
1096
+ }
1097
+ }
1098
+ }
1099
+ /**
1100
+ * Disconnect + re-connect the driver and re-send the batch ONCE. Used
1101
+ * by the fence-recovery path. Concurrent fence recoveries dedupe on a
1102
+ * shared in-flight promise (`fenceRecovery`) so we don't tear the
1103
+ * producer down while another batch is mid-restart.
1104
+ *
1105
+ * If the second send STILL reports any fenced records, those failures
1106
+ * are returned unchanged — another instance has almost certainly taken
1107
+ * the same `transactionalId` and silently retrying again would mask
1108
+ * the misconfiguration.
1109
+ */
1110
+ async recoverAndRetry(outgoing, firstResults) {
1111
+ if (!this.fenceRecovery) {
1112
+ this.fenceRecovery = (async () => {
1113
+ try {
1114
+ await this.driver.disconnect();
1115
+ await this.driver.connect();
1116
+ } finally {
1117
+ this.fenceRecovery = null;
1118
+ }
1119
+ })();
1120
+ }
1121
+ try {
1122
+ await this.fenceRecovery;
1123
+ } catch (err) {
1124
+ const reconnectErr = err instanceof Error ? err : new Error(String(err));
1125
+ await safeHook(
1126
+ this.logger,
1127
+ "onError",
1128
+ () => this.hooks.onError?.(reconnectErr)
1129
+ );
1130
+ return firstResults;
1131
+ }
1132
+ try {
1133
+ return await this.driver.sendBatch(outgoing);
1134
+ } catch {
1135
+ return firstResults;
1136
+ }
1137
+ }
726
1138
  /**
727
1139
  * Start a span for the batch following the OTel messaging conventions.
728
1140
  *
@@ -741,6 +1153,26 @@ var KafkaPublisher = class {
741
1153
  });
742
1154
  }
743
1155
  };
1156
+ function raceWithTimeout(p, ms, label) {
1157
+ return new Promise((resolve, reject) => {
1158
+ const timer = setTimeout(() => {
1159
+ reject(new Error(`${label} timed out after ${ms}ms`));
1160
+ }, ms);
1161
+ if (typeof timer.unref === "function") {
1162
+ timer.unref();
1163
+ }
1164
+ p.then(
1165
+ (v) => {
1166
+ clearTimeout(timer);
1167
+ resolve(v);
1168
+ },
1169
+ (e) => {
1170
+ clearTimeout(timer);
1171
+ reject(e);
1172
+ }
1173
+ );
1174
+ });
1175
+ }
744
1176
  function selectDriver(opts) {
745
1177
  const kind = opts.driver ?? "kafkajs";
746
1178
  switch (kind) {