@eventferry/kafka 3.3.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,6 +7,7 @@ function classifyKafkajsError(err) {
7
7
  if (e.name === "KafkaJSNonRetriableError") return "fatal";
8
8
  const type = typeof e.type === "string" ? e.type : void 0;
9
9
  if (type) {
10
+ if (FENCED_TYPES.has(type)) return "fenced";
10
11
  if (RETRIABLE_TYPES.has(type)) return "retriable";
11
12
  if (POISON_TYPES.has(type)) return "poison";
12
13
  if (FATAL_TYPES.has(type)) return "fatal";
@@ -40,9 +41,11 @@ var POISON_TYPES = /* @__PURE__ */ new Set([
40
41
  "INVALID_REQUIRED_ACKS",
41
42
  "INVALID_PARTITIONS"
42
43
  ]);
43
- var FATAL_TYPES = /* @__PURE__ */ new Set([
44
+ var FENCED_TYPES = /* @__PURE__ */ new Set([
44
45
  "INVALID_PRODUCER_EPOCH",
45
- "PRODUCER_FENCED",
46
+ "PRODUCER_FENCED"
47
+ ]);
48
+ var FATAL_TYPES = /* @__PURE__ */ new Set([
46
49
  "TOPIC_AUTHORIZATION_FAILED",
47
50
  "CLUSTER_AUTHORIZATION_FAILED",
48
51
  "TRANSACTIONAL_ID_AUTHORIZATION_FAILED",
@@ -73,8 +76,8 @@ var CODE_TO_KIND = /* @__PURE__ */ new Map([
73
76
  // TOPIC_AUTHORIZATION_FAILED
74
77
  [31, "fatal"],
75
78
  // CLUSTER_AUTHORIZATION_FAILED
76
- [47, "fatal"],
77
- // INVALID_PRODUCER_EPOCH
79
+ [47, "fenced"],
80
+ // INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
78
81
  [58, "fatal"],
79
82
  // SASL_AUTHENTICATION_FAILED
80
83
  [74, "retriable"],
@@ -104,7 +107,13 @@ var UNSUPPORTED_BY_KAFKAJS = [
104
107
  "lingerMs",
105
108
  "batchSize",
106
109
  "deliveryTimeoutMs",
107
- "maxRequestSize"
110
+ "maxRequestSize",
111
+ // Confluent-only escape hatches; ignored on kafkajs.
112
+ "compressionLevel",
113
+ "rawProducerConfig",
114
+ // librdkafka stats — kafkajs has no equivalent surface.
115
+ "onStats",
116
+ "statsIntervalMs"
108
117
  ];
109
118
  var KafkaJsDriver = class {
110
119
  transactional;
@@ -143,13 +152,21 @@ var KafkaJsDriver = class {
143
152
  // the provider's returned token (other fields are ignored).
144
153
  sasl: this.opts.sasl
145
154
  });
146
- const createPartitioner = resolveCreatePartitioner(
147
- mod.Partitioners,
155
+ return kafka.producer(await this.buildProducerOptions(mod.Partitioners));
156
+ }
157
+ /**
158
+ * Compute the options object passed to `kafka.producer({...})`. Exposed
159
+ * as a test seam so power-user escape hatches (customPartitioner,
160
+ * rawKafkaJsProducerConfig) can be asserted without a live broker.
161
+ */
162
+ async buildProducerOptions(partitioners) {
163
+ const createPartitioner = this.opts.customPartitioner ?? resolveCreatePartitioner(
164
+ partitioners,
148
165
  this.opts.partitioner,
149
166
  this.transactional
150
167
  );
151
168
  const resolvedTxId = this.transactional ? await resolveTransactionalId(this.opts.transactionalId) : void 0;
152
- return kafka.producer({
169
+ return {
153
170
  idempotent: this.opts.idempotent ?? true,
154
171
  // Idempotent / transactional producers cap maxInFlight at 5. When the
155
172
  // user picks transactional we force 1 to keep strict ordering across
@@ -163,13 +180,32 @@ var KafkaJsDriver = class {
163
180
  transactionTimeout: this.opts.transactionTimeoutMs,
164
181
  // Setting any partitioner choice silences kafkajs's
165
182
  // KafkaJSPartitionerNotSpecified warning.
166
- createPartitioner
167
- });
183
+ createPartitioner,
184
+ // Power-user escape hatch — merged LAST so raw keys win against the
185
+ // translated ones. That's the contract: anything you put here is
186
+ // final, even if it overrides idempotent/transactionalId/etc.
187
+ ...this.opts.rawKafkaJsProducerConfig ?? {}
188
+ };
168
189
  }
169
190
  async disconnect() {
170
191
  await this.producer?.disconnect();
171
192
  this.producer = null;
172
193
  }
194
+ /**
195
+ * Construct a kafkajs admin client wrapped in the eventferry-facing
196
+ * `KafkaDriverAdmin` shape. The publisher calls `.connect()` on the
197
+ * returned object before exposing it via `publisher.admin()`.
198
+ */
199
+ async admin() {
200
+ const mod = await importKafkaJs();
201
+ const kafka = new mod.Kafka({
202
+ clientId: this.opts.clientId ?? "eventferry-admin",
203
+ brokers: this.opts.brokers,
204
+ ssl: this.opts.ssl,
205
+ sasl: this.opts.sasl
206
+ });
207
+ return new KafkaJsAdmin(kafka.admin());
208
+ }
173
209
  async sendBatch(messages) {
174
210
  if (!this.producer) throw new Error("KafkaJsDriver not connected");
175
211
  const topicMessages = groupByTopic(messages, this.opts.compression);
@@ -258,6 +294,69 @@ function warnUnsupportedKafkajsOptions(opts) {
258
294
  function _resetKafkajsWarnDedup() {
259
295
  warnedKafkajsKeys.clear();
260
296
  }
297
+ var KafkaJsAdmin = class {
298
+ constructor(client) {
299
+ this.client = client;
300
+ }
301
+ client;
302
+ async connect() {
303
+ await this.client.connect();
304
+ }
305
+ async close() {
306
+ await this.client.disconnect();
307
+ }
308
+ async listTopics() {
309
+ return await this.client.listTopics();
310
+ }
311
+ async describeTopics(topics) {
312
+ if (topics.length === 0) return [];
313
+ const all = new Set(await this.client.listTopics());
314
+ const existing = topics.filter((t) => all.has(t));
315
+ const missing = topics.filter((t) => !all.has(t));
316
+ const meta = existing.length ? await this.client.fetchTopicMetadata({ topics: existing }) : { topics: [] };
317
+ const byName = new Map(meta.topics.map((t) => [t.name, t]));
318
+ return topics.map((topic) => {
319
+ if (missing.includes(topic)) return { topic, partitions: [] };
320
+ const found = byName.get(topic);
321
+ if (!found) return { topic, partitions: [] };
322
+ return {
323
+ topic,
324
+ partitions: found.partitions.map((p) => ({
325
+ partitionId: p.partitionId,
326
+ leader: p.leader,
327
+ replicas: p.replicas,
328
+ isr: p.isr
329
+ }))
330
+ };
331
+ });
332
+ }
333
+ async createTopics(specs) {
334
+ if (specs.length === 0) return;
335
+ const topics = specs.map((s) => ({
336
+ topic: s.topic,
337
+ numPartitions: s.numPartitions,
338
+ replicationFactor: s.replicationFactor,
339
+ configEntries: s.configEntries ? Object.entries(s.configEntries).map(([name, value]) => ({ name, value })) : void 0
340
+ }));
341
+ try {
342
+ await this.client.createTopics({ topics, waitForLeaders: true });
343
+ } catch (err) {
344
+ const e = err;
345
+ if (e?.type === "TOPIC_ALREADY_EXISTS") return;
346
+ if (/already exists/i.test(e?.message ?? "")) return;
347
+ throw err;
348
+ }
349
+ }
350
+ async createPartitions(specs) {
351
+ if (specs.length === 0) return;
352
+ await this.client.createPartitions({
353
+ topicPartitions: specs.map((s) => ({
354
+ topic: s.topic,
355
+ count: s.totalCount
356
+ }))
357
+ });
358
+ }
359
+ };
261
360
  async function importKafkaJs() {
262
361
  try {
263
362
  return await import("kafkajs");
@@ -300,8 +399,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
300
399
  // ERR__TRANSPORT
301
400
  [-198, "poison"],
302
401
  // ERR__BAD_COMPRESSION
303
- [-144, "fatal"],
304
- // ERR__FENCED — producer fenced by another with same txn id
402
+ [-144, "fenced"],
403
+ // ERR__FENCED — producer fenced; publisher reconnect attempts a transparent recovery once
305
404
  [-150, "fatal"],
306
405
  // ERR__FATAL — unrecoverable librdkafka error
307
406
  [-169, "fatal"],
@@ -333,8 +432,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
333
432
  // TOPIC_AUTHORIZATION_FAILED
334
433
  [31, "fatal"],
335
434
  // CLUSTER_AUTHORIZATION_FAILED
336
- [47, "fatal"],
337
- // INVALID_PRODUCER_EPOCH
435
+ [47, "fenced"],
436
+ // INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
338
437
  [58, "fatal"],
339
438
  // SASL_AUTHENTICATION_FAILED
340
439
  [74, "retriable"],
@@ -348,7 +447,7 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
348
447
  ]);
349
448
  var NAME_TO_KIND = /* @__PURE__ */ new Map([
350
449
  ["ERR__QUEUE_FULL", "backpressure"],
351
- ["ERR__FENCED", "fatal"],
450
+ ["ERR__FENCED", "fenced"],
352
451
  ["ERR__FATAL", "fatal"],
353
452
  ["ERR__AUTHENTICATION", "fatal"],
354
453
  ["ERR__SSL", "fatal"],
@@ -357,7 +456,7 @@ var NAME_TO_KIND = /* @__PURE__ */ new Map([
357
456
  ["ERR__BAD_COMPRESSION", "poison"],
358
457
  ["ERR_TOPIC_AUTHORIZATION_FAILED", "fatal"],
359
458
  ["ERR_CLUSTER_AUTHORIZATION_FAILED", "fatal"],
360
- ["ERR_INVALID_PRODUCER_EPOCH", "fatal"],
459
+ ["ERR_INVALID_PRODUCER_EPOCH", "fenced"],
361
460
  ["ERR_SASL_AUTHENTICATION_FAILED", "fatal"],
362
461
  ["ERR_CORRUPT_MESSAGE", "poison"],
363
462
  ["ERR_MSG_SIZE_TOO_LARGE", "poison"],
@@ -390,6 +489,15 @@ function buildConfluentClientConfig(opts) {
390
489
  if (opts.transactionTimeoutMs !== void 0) {
391
490
  librdkafka["transaction.timeout.ms"] = opts.transactionTimeoutMs;
392
491
  }
492
+ if (opts.compressionLevel !== void 0) {
493
+ librdkafka["compression.level"] = opts.compressionLevel;
494
+ }
495
+ if (opts.onStats) {
496
+ librdkafka["stats_cb"] = wrapStatsCallback(opts.onStats);
497
+ librdkafka["statistics.interval.ms"] = opts.statsIntervalMs ?? 3e4;
498
+ } else if (opts.statsIntervalMs !== void 0) {
499
+ librdkafka["statistics.interval.ms"] = opts.statsIntervalMs;
500
+ }
393
501
  const tlsRequested = opts.ssl === true || isTlsConfig(opts.ssl);
394
502
  const saslRequested = !!opts.sasl;
395
503
  if (saslRequested && tlsRequested) {
@@ -419,11 +527,28 @@ function buildConfluentClientConfig(opts) {
419
527
  if (opts.sasl) {
420
528
  kafkaJS["sasl"] = opts.sasl;
421
529
  }
530
+ if (opts.rawProducerConfig) {
531
+ Object.assign(librdkafka, opts.rawProducerConfig);
532
+ }
422
533
  return { kafkaJS, librdkafka };
423
534
  }
424
535
  function isTlsConfig(v) {
425
536
  return typeof v === "object" && v !== null;
426
537
  }
538
+ function wrapStatsCallback(onStats) {
539
+ return (raw) => {
540
+ let parsed;
541
+ try {
542
+ parsed = typeof raw === "string" ? JSON.parse(raw) : raw;
543
+ } catch {
544
+ return;
545
+ }
546
+ try {
547
+ onStats(parsed);
548
+ } catch {
549
+ }
550
+ };
551
+ }
427
552
  function stringifyPem(input) {
428
553
  if (Array.isArray(input)) {
429
554
  return input.map((x) => typeof x === "string" ? x : x.toString("utf8")).join("\n");
@@ -472,6 +597,17 @@ var ConfluentDriver = class {
472
597
  await this.producer?.disconnect();
473
598
  this.producer = null;
474
599
  }
600
+ /**
601
+ * Construct a librdkafka-backed admin client wrapped in the eventferry
602
+ * `KafkaDriverAdmin` shape. The publisher's `connect()` is called before
603
+ * the admin reaches the user.
604
+ */
605
+ async admin() {
606
+ const mod = await importConfluent();
607
+ const { kafkaJS, librdkafka } = buildConfluentClientConfig(this.opts);
608
+ const kafka = new mod.KafkaJS.Kafka({ kafkaJS, ...librdkafka });
609
+ return new ConfluentAdmin(kafka.admin());
610
+ }
475
611
  async sendBatch(messages) {
476
612
  if (!this.producer) throw new Error("ConfluentDriver not connected");
477
613
  const topicMessages = groupByTopic2(messages);
@@ -540,6 +676,69 @@ function groupByTopic2(messages) {
540
676
  messages: msgs
541
677
  }));
542
678
  }
679
+ var ConfluentAdmin = class {
680
+ constructor(client) {
681
+ this.client = client;
682
+ }
683
+ client;
684
+ async connect() {
685
+ await this.client.connect();
686
+ }
687
+ async close() {
688
+ await this.client.disconnect();
689
+ }
690
+ async listTopics() {
691
+ return await this.client.listTopics();
692
+ }
693
+ async describeTopics(topics) {
694
+ if (topics.length === 0) return [];
695
+ const all = new Set(await this.client.listTopics());
696
+ const existing = topics.filter((t) => all.has(t));
697
+ const missing = topics.filter((t) => !all.has(t));
698
+ const meta = existing.length ? await this.client.fetchTopicMetadata({ topics: existing }) : { topics: [] };
699
+ const byName = new Map(meta.topics.map((t) => [t.name, t]));
700
+ return topics.map((topic) => {
701
+ if (missing.includes(topic)) return { topic, partitions: [] };
702
+ const found = byName.get(topic);
703
+ if (!found) return { topic, partitions: [] };
704
+ return {
705
+ topic,
706
+ partitions: found.partitions.map((p) => ({
707
+ partitionId: p.partitionId,
708
+ leader: p.leader,
709
+ replicas: p.replicas,
710
+ isr: p.isr
711
+ }))
712
+ };
713
+ });
714
+ }
715
+ async createTopics(specs) {
716
+ if (specs.length === 0) return;
717
+ const topics = specs.map((s) => ({
718
+ topic: s.topic,
719
+ numPartitions: s.numPartitions,
720
+ replicationFactor: s.replicationFactor,
721
+ configEntries: s.configEntries ? Object.entries(s.configEntries).map(([name, value]) => ({ name, value })) : void 0
722
+ }));
723
+ try {
724
+ await this.client.createTopics({ topics, waitForLeaders: true });
725
+ } catch (err) {
726
+ const e = err;
727
+ if (e?.code === 36 || e?.name === "TOPIC_ALREADY_EXISTS") return;
728
+ if (/already exists/i.test(e?.message ?? "")) return;
729
+ throw err;
730
+ }
731
+ }
732
+ async createPartitions(specs) {
733
+ if (specs.length === 0) return;
734
+ await this.client.createPartitions({
735
+ topicPartitions: specs.map((s) => ({
736
+ topic: s.topic,
737
+ count: s.totalCount
738
+ }))
739
+ });
740
+ }
741
+ };
543
742
  async function importConfluent() {
544
743
  try {
545
744
  return await import("@confluentinc/kafka-javascript");
@@ -590,10 +789,18 @@ var KafkaPublisher = class {
590
789
  logger;
591
790
  hooks;
592
791
  tracer;
792
+ validateTopicsOnConnect;
793
+ autoRecoverFromFence;
794
+ // Serialize reconnects so concurrent publish() calls hitting a fence
795
+ // all observe the same single reconnect attempt — the second publish
796
+ // doesn't try to disconnect a producer the first is still re-initing.
797
+ fenceRecovery = null;
593
798
  constructor(opts) {
594
799
  this.logger = opts.logger;
595
800
  this.hooks = opts.hooks ?? {};
596
801
  this.tracer = opts.tracer ?? new NoopKafkaTracer();
802
+ this.validateTopicsOnConnect = opts.validateTopicsOnConnect ? Object.freeze([...opts.validateTopicsOnConnect]) : void 0;
803
+ this.autoRecoverFromFence = opts.autoRecoverFromFence ?? false;
597
804
  const onTransactionAbort = this.hooks.onTransactionAbort ? (error) => {
598
805
  void safeHook(
599
806
  this.logger,
@@ -605,8 +812,90 @@ var KafkaPublisher = class {
605
812
  }
606
813
  async connect() {
607
814
  await this.driver.connect();
815
+ if (this.validateTopicsOnConnect && this.validateTopicsOnConnect.length) {
816
+ await this.assertTopicsExist(this.validateTopicsOnConnect);
817
+ }
608
818
  await safeHook(this.logger, "onConnect", () => this.hooks.onConnect?.());
609
819
  }
820
+ /**
821
+ * Borrow a new admin client from the driver. The returned admin is
822
+ * connected and ready to use; the CALLER must `close()` it. Throws if the
823
+ * driver does not implement admin (custom driver lacking the capability).
824
+ */
825
+ async admin() {
826
+ const driverAdmin = await this.openDriverAdmin();
827
+ return driverAdmin;
828
+ }
829
+ /**
830
+ * Idempotently provision topics. Each spec creates the topic if absent;
831
+ * existing topics are skipped without error. If `growPartitions: true`
832
+ * (default false), topics whose current partition count is below the
833
+ * requested `numPartitions` are grown via `createPartitions`.
834
+ *
835
+ * Replication factor and config entries on EXISTING topics are NOT
836
+ * reconciled — Kafka does not provide a safe in-place alter for those
837
+ * (changing replication requires reassignment; configs use alterConfigs).
838
+ * Reach for the raw admin if you need that.
839
+ */
840
+ async ensureTopics(specs, opts = {}) {
841
+ if (specs.length === 0) return;
842
+ const admin = await this.openDriverAdmin();
843
+ try {
844
+ const topicNames = specs.map((s) => s.topic);
845
+ const existing = await admin.describeTopics(topicNames);
846
+ const existingByName = new Map(existing.map((t) => [t.topic, t]));
847
+ const toCreate = specs.filter(
848
+ (s) => (existingByName.get(s.topic)?.partitions.length ?? 0) === 0
849
+ );
850
+ if (toCreate.length) await admin.createTopics(toCreate);
851
+ if (opts.growPartitions) {
852
+ const grow = [];
853
+ for (const s of specs) {
854
+ if (s.numPartitions === void 0) continue;
855
+ const current = existingByName.get(s.topic);
856
+ const currentCount = current?.partitions.length ?? 0;
857
+ if (currentCount > 0 && currentCount < s.numPartitions) {
858
+ grow.push({ topic: s.topic, totalCount: s.numPartitions });
859
+ }
860
+ }
861
+ if (grow.length) await admin.createPartitions(grow);
862
+ }
863
+ } finally {
864
+ await admin.close();
865
+ }
866
+ }
867
+ /**
868
+ * Borrow a fresh admin from the driver and connect it. Throws when the
869
+ * driver does not implement admin (custom drivers without that capability).
870
+ */
871
+ async openDriverAdmin() {
872
+ if (!this.driver.admin) {
873
+ throw new Error(
874
+ "KafkaPublisher: configured driver does not implement admin(). Use the built-in kafkajs or confluent driver, or extend your custom driver."
875
+ );
876
+ }
877
+ const admin = await this.driver.admin();
878
+ await admin.connect();
879
+ return admin;
880
+ }
881
+ /**
882
+ * Open an admin, list topics, throw if any required topic is missing.
883
+ * Always closes the admin (success or failure).
884
+ */
885
+ async assertTopicsExist(required) {
886
+ const admin = await this.openDriverAdmin();
887
+ try {
888
+ const all = new Set(await admin.listTopics());
889
+ const missing = required.filter((t) => !all.has(t));
890
+ if (missing.length) {
891
+ throw new Error(
892
+ `KafkaPublisher: validateTopicsOnConnect failed \u2014 topics missing on cluster: ${missing.join(", ")}`
893
+ );
894
+ }
895
+ } finally {
896
+ await admin.close();
897
+ }
898
+ }
610
899
  async disconnect() {
611
900
  await this.driver.disconnect();
612
901
  await safeHook(
@@ -618,9 +907,14 @@ var KafkaPublisher = class {
618
907
  async publish(messages) {
619
908
  if (messages.length === 0) return [];
620
909
  const span = this.startBatchSpan(messages);
910
+ const outgoing = this.tracer.inject ? messages.map((m) => {
911
+ const headers = { ...m.headers };
912
+ this.tracer.inject(span, headers);
913
+ return { ...m, headers };
914
+ }) : messages;
621
915
  let results;
622
916
  try {
623
- results = await this.driver.sendBatch(messages);
917
+ results = await this.driver.sendBatch(outgoing);
624
918
  } catch (err) {
625
919
  const error = err instanceof Error ? err : new Error(String(err));
626
920
  span.setStatus({ code: "error", message: error.message });
@@ -629,6 +923,20 @@ var KafkaPublisher = class {
629
923
  await safeHook(this.logger, "onError", () => this.hooks.onError?.(error));
630
924
  throw err;
631
925
  }
926
+ const firstFenced = results.find(
927
+ (r) => !r.ok && r.errorKind === "fenced"
928
+ );
929
+ if (firstFenced) {
930
+ const fenceErr = firstFenced.error ?? new Error("producer fenced");
931
+ await safeHook(
932
+ this.logger,
933
+ "onProducerFenced",
934
+ () => this.hooks.onProducerFenced?.(fenceErr)
935
+ );
936
+ if (this.autoRecoverFromFence) {
937
+ results = await this.recoverAndRetry(outgoing, results);
938
+ }
939
+ }
632
940
  const byId = new Map(messages.map((m) => [m.recordId, m]));
633
941
  let allOk = true;
634
942
  for (const r of results) {
@@ -679,6 +987,110 @@ var KafkaPublisher = class {
679
987
  get transactional() {
680
988
  return this.driver.transactional;
681
989
  }
990
+ /**
991
+ * Cheap reachability probe. Borrows a fresh admin client, calls
992
+ * `listTopics`, and returns timing + outcome. Useful as the body of a
993
+ * `/healthz` or `/readyz` endpoint — proves the broker is reachable
994
+ * AND that the configured credentials still authenticate against it,
995
+ * without writing a record.
996
+ *
997
+ * Does NOT exercise the producer's send path — a healthy admin
998
+ * connection doesn't guarantee `publish()` will succeed (a fenced
999
+ * transactional producer would still answer healthy here). Treat this
1000
+ * as "broker reachable + auth still good", not "publisher is fully
1001
+ * operational".
1002
+ *
1003
+ * Default timeout 5_000 ms — long enough to ride out a single broker
1004
+ * leader election, short enough to fail a liveness probe meaningfully.
1005
+ * Set `timeoutMs: 0` to disable the timer entirely.
1006
+ *
1007
+ * The driver must implement `admin()` (the built-ins do); custom
1008
+ * drivers without admin get `{ ok: false, error: ... }` instead of
1009
+ * the throw `publisher.admin()` would surface — health checks are
1010
+ * not the place to crash.
1011
+ */
1012
+ async healthCheck(opts = {}) {
1013
+ const timeoutMs = opts.timeoutMs ?? 5e3;
1014
+ const startedAt = Date.now();
1015
+ if (!this.driver.admin) {
1016
+ return {
1017
+ ok: false,
1018
+ latencyMs: 0,
1019
+ timestamp: startedAt,
1020
+ error: new Error(
1021
+ "KafkaPublisher.healthCheck: configured driver does not implement admin()"
1022
+ )
1023
+ };
1024
+ }
1025
+ let admin = null;
1026
+ try {
1027
+ admin = await this.driver.admin();
1028
+ await admin.connect();
1029
+ const probe = admin.listTopics();
1030
+ if (timeoutMs > 0) {
1031
+ await raceWithTimeout(probe, timeoutMs, "healthCheck");
1032
+ } else {
1033
+ await probe;
1034
+ }
1035
+ return {
1036
+ ok: true,
1037
+ latencyMs: Date.now() - startedAt,
1038
+ timestamp: startedAt
1039
+ };
1040
+ } catch (err) {
1041
+ const error = err instanceof Error ? err : new Error(String(err));
1042
+ return {
1043
+ ok: false,
1044
+ latencyMs: Date.now() - startedAt,
1045
+ timestamp: startedAt,
1046
+ error
1047
+ };
1048
+ } finally {
1049
+ try {
1050
+ await admin?.close();
1051
+ } catch {
1052
+ }
1053
+ }
1054
+ }
1055
+ /**
1056
+ * Disconnect + re-connect the driver and re-send the batch ONCE. Used
1057
+ * by the fence-recovery path. Concurrent fence recoveries dedupe on a
1058
+ * shared in-flight promise (`fenceRecovery`) so we don't tear the
1059
+ * producer down while another batch is mid-restart.
1060
+ *
1061
+ * If the second send STILL reports any fenced records, those failures
1062
+ * are returned unchanged — another instance has almost certainly taken
1063
+ * the same `transactionalId` and silently retrying again would mask
1064
+ * the misconfiguration.
1065
+ */
1066
+ async recoverAndRetry(outgoing, firstResults) {
1067
+ if (!this.fenceRecovery) {
1068
+ this.fenceRecovery = (async () => {
1069
+ try {
1070
+ await this.driver.disconnect();
1071
+ await this.driver.connect();
1072
+ } finally {
1073
+ this.fenceRecovery = null;
1074
+ }
1075
+ })();
1076
+ }
1077
+ try {
1078
+ await this.fenceRecovery;
1079
+ } catch (err) {
1080
+ const reconnectErr = err instanceof Error ? err : new Error(String(err));
1081
+ await safeHook(
1082
+ this.logger,
1083
+ "onError",
1084
+ () => this.hooks.onError?.(reconnectErr)
1085
+ );
1086
+ return firstResults;
1087
+ }
1088
+ try {
1089
+ return await this.driver.sendBatch(outgoing);
1090
+ } catch {
1091
+ return firstResults;
1092
+ }
1093
+ }
682
1094
  /**
683
1095
  * Start a span for the batch following the OTel messaging conventions.
684
1096
  *
@@ -697,6 +1109,26 @@ var KafkaPublisher = class {
697
1109
  });
698
1110
  }
699
1111
  };
1112
+ function raceWithTimeout(p, ms, label) {
1113
+ return new Promise((resolve, reject) => {
1114
+ const timer = setTimeout(() => {
1115
+ reject(new Error(`${label} timed out after ${ms}ms`));
1116
+ }, ms);
1117
+ if (typeof timer.unref === "function") {
1118
+ timer.unref();
1119
+ }
1120
+ p.then(
1121
+ (v) => {
1122
+ clearTimeout(timer);
1123
+ resolve(v);
1124
+ },
1125
+ (e) => {
1126
+ clearTimeout(timer);
1127
+ reject(e);
1128
+ }
1129
+ );
1130
+ });
1131
+ }
700
1132
  function selectDriver(opts) {
701
1133
  const kind = opts.driver ?? "kafkajs";
702
1134
  switch (kind) {