npm - @drarzter/kafka-client - Versions diffs - 0.5.7 → 0.6.3 - Mend

@drarzter/kafka-client 0.5.7 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +146 -11
package/dist/{chunk-TD2AE774.mjs → chunk-RGRKN4E5.mjs} +406 -114
package/dist/chunk-RGRKN4E5.mjs.map +1 -0
package/dist/core.d.mts +38 -3
package/dist/core.d.ts +38 -3
package/dist/core.js +405 -113
package/dist/core.js.map +1 -1
package/dist/core.mjs +1 -1
package/dist/index.d.mts +5 -6
package/dist/index.d.ts +5 -6
package/dist/index.js +405 -113
package/dist/index.js.map +1 -1
package/dist/index.mjs +1 -1
package/dist/index.mjs.map +1 -1
package/dist/otel.d.mts +1 -1
package/dist/otel.d.ts +1 -1
package/dist/otel.js +9 -3
package/dist/otel.js.map +1 -1
package/dist/otel.mjs +9 -3
package/dist/otel.mjs.map +1 -1
package/dist/testing.d.mts +1 -1
package/dist/testing.d.ts +1 -1
package/dist/testing.js +2 -1
package/dist/testing.js.map +1 -1
package/dist/testing.mjs +2 -1
package/dist/testing.mjs.map +1 -1
package/dist/{types-DwERZ6ql.d.mts → types-zFbQH_Cy.d.mts} +65 -6
package/dist/{types-DwERZ6ql.d.ts → types-zFbQH_Cy.d.ts} +65 -6
package/package.json +1 -1
package/dist/chunk-TD2AE774.mjs.map +0 -1

package/dist/core.js CHANGED Viewed

@@ -79,7 +79,8 @@ function decodeHeaders(raw) {
   for (const [key, value] of Object.entries(raw)) {
     if (value === void 0) continue;
     if (Array.isArray(value)) {
-      result[key] = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v).join(",");
+      const items = value.map((v) => Buffer.isBuffer(v) ? v.toString() : v);
+      result[key] = items[items.length - 1] ?? "";
     } else {
       result[key] = Buffer.isBuffer(value) ? value.toString() : value;
     }
@@ -141,17 +142,23 @@ function resolveTopicName(topicOrDescriptor) {
   }
   return String(topicOrDescriptor);
 }
-function registerSchema(topicOrDesc, schemaRegistry) {
+function registerSchema(topicOrDesc, schemaRegistry, logger) {
   if (topicOrDesc?.__schema) {
     const topic2 = resolveTopicName(topicOrDesc);
+    const existing = schemaRegistry.get(topic2);
+    if (existing && existing !== topicOrDesc.__schema) {
+      logger?.warn(
+        `Schema conflict for topic "${topic2}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
+      );
+    }
     schemaRegistry.set(topic2, topicOrDesc.__schema);
   }
 }
-async function validateMessage(topicOrDesc, message, deps) {
+async function validateMessage(topicOrDesc, message, deps, ctx) {
   const topicName = resolveTopicName(topicOrDesc);
   if (topicOrDesc?.__schema) {
     try {
-      return await topicOrDesc.__schema.parse(message);
+      return await topicOrDesc.__schema.parse(message, ctx);
     } catch (error) {
       throw new KafkaValidationError(topicName, message, {
         cause: error instanceof Error ? error : new Error(String(error))
@@ -162,7 +169,7 @@ async function validateMessage(topicOrDesc, message, deps) {
     const schema = deps.schemaRegistry.get(topicOrDesc);
     if (schema) {
       try {
-        return await schema.parse(message);
+        return await schema.parse(message, ctx);
       } catch (error) {
         throw new KafkaValidationError(topicName, message, {
           cause: error instanceof Error ? error : new Error(String(error))
@@ -185,9 +192,14 @@ async function buildSendPayload(topicOrDesc, messages, deps) {
       for (const inst of deps.instrumentation) {
         inst.beforeSend?.(topic2, envelopeHeaders);
       }
+      const sendCtx = {
+        topic: topic2,
+        headers: envelopeHeaders,
+        version: m.schemaVersion ?? 1
+      };
       return {
         value: JSON.stringify(
-          await validateMessage(topicOrDesc, m.value, deps)
+          await validateMessage(topicOrDesc, m.value, deps, sendCtx)
         ),
         key: m.key ?? null,
         headers: envelopeHeaders
@@ -231,19 +243,26 @@ function getOrCreateConsumer(groupId, fromBeginning, autoCommit, deps) {
   consumers.set(groupId, consumer);
   return consumer;
 }
-function buildSchemaMap(topics, schemaRegistry, optionSchemas) {
+function buildSchemaMap(topics, schemaRegistry, optionSchemas, logger) {
   const schemaMap = /* @__PURE__ */ new Map();
+  const registerChecked = (name, schema) => {
+    const existing = schemaRegistry.get(name);
+    if (existing && existing !== schema) {
+      logger?.warn(
+        `Schema conflict for topic "${name}": a different schema is already registered. Using the new schema \u2014 ensure consistent schemas to avoid silent validation mismatches.`
+      );
+    }
+    schemaMap.set(name, schema);
+    schemaRegistry.set(name, schema);
+  };
   for (const t of topics) {
     if (t?.__schema) {
-      const name = resolveTopicName(t);
-      schemaMap.set(name, t.__schema);
-      schemaRegistry.set(name, t.__schema);
+      registerChecked(resolveTopicName(t), t.__schema);
     }
   }
   if (optionSchemas) {
     for (const [k, v] of optionSchemas) {
-      schemaMap.set(k, v);
-      schemaRegistry.set(k, v);
+      registerChecked(k, v);
     }
   }
   return schemaMap;
@@ -270,8 +289,13 @@ function parseJsonMessage(raw, topic2, logger) {
 async function validateWithSchema(message, raw, topic2, schemaMap, interceptors, dlq, deps) {
   const schema = schemaMap.get(topic2);
   if (!schema) return message;
+  const ctx = {
+    topic: topic2,
+    headers: deps.originalHeaders ?? {},
+    version: Number(deps.originalHeaders?.["x-schema-version"] ?? 1)
+  };
   try {
-    return await schema.parse(message);
+    return await schema.parse(message, ctx);
   } catch (error) {
     const err = toError(error);
     const validationError = new KafkaValidationError(topic2, message, {
@@ -308,7 +332,7 @@ async function validateWithSchema(message, raw, topic2, schemaMap, interceptors,
     return null;
   }
 }
-async function sendToDlq(topic2, rawMessage, deps, meta) {
+function buildDlqPayload(topic2, rawMessage, meta) {
   const dlqTopic = `${topic2}.dlq`;
   const headers = {
     ...meta?.originalHeaders ?? {},
@@ -318,54 +342,82 @@ async function sendToDlq(topic2, rawMessage, deps, meta) {
     "x-dlq-error-stack": meta?.error.stack?.slice(0, 2e3) ?? "",
     "x-dlq-attempt-count": String(meta?.attempt ?? 0)
   };
+  return { topic: dlqTopic, messages: [{ value: rawMessage, headers }] };
+}
+async function sendToDlq(topic2, rawMessage, deps, meta) {
+  const payload = buildDlqPayload(topic2, rawMessage, meta);
   try {
-    await deps.producer.send({
-      topic: dlqTopic,
-      messages: [{ value: rawMessage, headers }]
-    });
-    deps.logger.warn(`Message sent to DLQ: ${dlqTopic}`);
+    await deps.producer.send(payload);
+    deps.logger.warn(`Message sent to DLQ: ${payload.topic}`);
   } catch (error) {
-    deps.logger.error(
-      `Failed to send message to DLQ ${dlqTopic}:`,
-      toError(error).stack
-    );
+    const err = toError(error);
+    deps.logger.error(`Failed to send message to DLQ ${payload.topic}:`, err.stack);
+    await deps.onMessageLost?.({
+      topic: topic2,
+      error: err,
+      attempt: meta?.attempt ?? 0,
+      headers: meta?.originalHeaders ?? {}
+    });
   }
 }
 var RETRY_HEADER_ATTEMPT = "x-retry-attempt";
 var RETRY_HEADER_AFTER = "x-retry-after";
 var RETRY_HEADER_MAX_RETRIES = "x-retry-max-retries";
 var RETRY_HEADER_ORIGINAL_TOPIC = "x-retry-original-topic";
-async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
+function buildRetryTopicPayload(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders) {
   const retryTopic = `${originalTopic}.retry.${attempt}`;
-  const {
-    [RETRY_HEADER_ATTEMPT]: _a,
-    [RETRY_HEADER_AFTER]: _b,
-    [RETRY_HEADER_MAX_RETRIES]: _c,
-    [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
-    ...userHeaders
-  } = originalHeaders;
-  const headers = {
-    ...userHeaders,
-    [RETRY_HEADER_ATTEMPT]: String(attempt),
-    [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
-    [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
-    [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
+  function buildHeaders(hdr) {
+    const {
+      [RETRY_HEADER_ATTEMPT]: _a,
+      [RETRY_HEADER_AFTER]: _b,
+      [RETRY_HEADER_MAX_RETRIES]: _c,
+      [RETRY_HEADER_ORIGINAL_TOPIC]: _d,
+      ...userHeaders
+    } = hdr;
+    return {
+      ...userHeaders,
+      [RETRY_HEADER_ATTEMPT]: String(attempt),
+      [RETRY_HEADER_AFTER]: String(Date.now() + delayMs),
+      [RETRY_HEADER_MAX_RETRIES]: String(maxRetries),
+      [RETRY_HEADER_ORIGINAL_TOPIC]: originalTopic
+    };
+  }
+  return {
+    topic: retryTopic,
+    messages: rawMessages.map((value, i) => ({
+      value,
+      headers: buildHeaders(
+        Array.isArray(originalHeaders) ? originalHeaders[i] ?? {} : originalHeaders
+      )
+    }))
   };
+}
+async function sendToRetryTopic(originalTopic, rawMessages, attempt, maxRetries, delayMs, originalHeaders, deps) {
+  const payload = buildRetryTopicPayload(
+    originalTopic,
+    rawMessages,
+    attempt,
+    maxRetries,
+    delayMs,
+    originalHeaders
+  );
   try {
-    for (const raw of rawMessages) {
-      await deps.producer.send({
-        topic: retryTopic,
-        messages: [{ value: raw, headers }]
-      });
-    }
+    await deps.producer.send(payload);
     deps.logger.warn(
-      `Message queued in retry topic ${retryTopic} (attempt ${attempt}/${maxRetries})`
+      `Message queued in retry topic ${payload.topic} (attempt ${attempt}/${maxRetries})`
     );
   } catch (error) {
+    const err = toError(error);
     deps.logger.error(
-      `Failed to send message to retry topic ${retryTopic}:`,
-      toError(error).stack
+      `Failed to send message to retry topic ${payload.topic}:`,
+      err.stack
     );
+    await deps.onMessageLost?.({
+      topic: originalTopic,
+      error: err,
+      attempt,
+      headers: Array.isArray(originalHeaders) ? originalHeaders[0] ?? {} : originalHeaders
+    });
   }
 }
 async function broadcastToInterceptors(envelopes, interceptors, cb) {
@@ -377,11 +429,17 @@ async function broadcastToInterceptors(envelopes, interceptors, cb) {
 }
 async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentation) {
   const cleanups = [];
+  const wraps = [];
   try {
     for (const env of envelopes) {
       for (const inst of instrumentation) {
-        const cleanup = inst.beforeConsume?.(env);
-        if (typeof cleanup === "function") cleanups.push(cleanup);
+        const result = inst.beforeConsume?.(env);
+        if (typeof result === "function") {
+          cleanups.push(result);
+        } else if (result) {
+          if (result.cleanup) cleanups.push(result.cleanup);
+          if (result.wrap) wraps.push(result.wrap);
+        }
       }
     }
     for (const env of envelopes) {
@@ -389,7 +447,13 @@ async function runHandlerWithPipeline(fn, envelopes, interceptors, instrumentati
         await interceptor.before?.(env);
       }
     }
-    await fn();
+    let runFn = fn;
+    for (let i = wraps.length - 1; i >= 0; i--) {
+      const wrap = wraps[i];
+      const inner = runFn;
+      runFn = () => wrap(inner);
+    }
+    await runFn();
     for (const env of envelopes) {
       for (const interceptor of interceptors) {
         await interceptor.after?.(env);
@@ -459,7 +523,7 @@ async function executeWithRetry(fn, ctx, deps) {
         1,
         retry.maxRetries,
         delay,
-        envelopes[0]?.headers ?? {},
+        isBatch ? envelopes.map((e) => e.headers) : envelopes[0]?.headers ?? {},
         deps
       );
     } else if (isLastAttempt) {
@@ -561,6 +625,7 @@ async function handleEachBatch(payload, opts, deps) {
     interceptors,
     dlq,
     retry,
+    retryTopics,
     timeoutMs,
     wrapWithTimeout
   } = opts;
@@ -595,11 +660,12 @@ async function handleEachBatch(payload, opts, deps) {
     },
     {
       envelope: envelopes,
-      rawMessages: batch.messages.filter((m) => m.value).map((m) => m.value.toString()),
+      rawMessages,
       interceptors,
       dlq,
       retry,
-      isBatch: true
+      isBatch: true,
+      retryTopics
     },
     deps
   );
@@ -616,10 +682,11 @@ async function subscribeWithRetry(consumer, topics, logger, retryOpts) {
     } catch (error) {
       if (attempt === maxAttempts) throw error;
       const msg = toError(error).message;
+      const delay = Math.floor(Math.random() * backoffMs);
       logger.warn(
-        `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${backoffMs}ms...`
+        `Failed to subscribe to [${topics.join(", ")}] (attempt ${attempt}/${maxAttempts}): ${msg}. Retrying in ${delay}ms...`
       );
-      await sleep(backoffMs);
+      await sleep(delay);
     }
   }
 }
@@ -648,7 +715,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
     onMessageLost,
     ensureTopic,
     getOrCreateConsumer: getOrCreateConsumer2,
-    runningConsumers
+    runningConsumers,
+    createRetryTxProducer
   } = deps;
   const backoffMs = retry.backoffMs ?? 1e3;
   const maxBackoffMs = retry.maxBackoffMs ?? 3e4;
@@ -656,6 +724,7 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
   for (const lt of levelTopics) {
     await ensureTopic(lt);
   }
+  const levelTxProducer = await createRetryTxProducer(`${levelGroupId}-tx`);
   const consumer = getOrCreateConsumer2(levelGroupId, false, false);
   await consumer.connect();
   await subscribeWithRetry(consumer, levelTopics, logger);
@@ -744,22 +813,67 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
         const nextLevel = level + 1;
         const cap = Math.min(backoffMs * 2 ** level, maxBackoffMs);
         const delay = Math.floor(Math.random() * cap);
-        await sendToRetryTopic(
+        const { topic: rtTopic, messages: rtMsgs } = buildRetryTopicPayload(
           originalTopic,
           [raw],
           nextLevel,
           currentMaxRetries,
           delay,
-          headers,
-          pipelineDeps
+          headers
         );
+        const tx = await levelTxProducer.transaction();
+        try {
+          await tx.send({ topic: rtTopic, messages: rtMsgs });
+          await tx.sendOffsets({
+            consumer,
+            topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
+          });
+          await tx.commit();
+          logger.warn(
+            `Message routed to ${rtTopic} (EOS, level ${nextLevel}/${currentMaxRetries})`
+          );
+        } catch (txErr) {
+          try {
+            await tx.abort();
+          } catch {
+          }
+          logger.error(
+            `EOS routing to ${rtTopic} failed \u2014 message will be redelivered:`,
+            toError(txErr).stack
+          );
+          return;
+        }
       } else if (dlq) {
-        await sendToDlq(originalTopic, raw, pipelineDeps, {
-          error,
-          // +1 to account for the main consumer's initial attempt before routing.
-          attempt: level + 1,
-          originalHeaders: headers
-        });
+        const { topic: dTopic, messages: dMsgs } = buildDlqPayload(
+          originalTopic,
+          raw,
+          {
+            error,
+            // +1 to account for the main consumer's initial attempt before routing.
+            attempt: level + 1,
+            originalHeaders: headers
+          }
+        );
+        const tx = await levelTxProducer.transaction();
+        try {
+          await tx.send({ topic: dTopic, messages: dMsgs });
+          await tx.sendOffsets({
+            consumer,
+            topics: [{ topic: nextOffset.topic, partitions: [{ partition: nextOffset.partition, offset: nextOffset.offset }] }]
+          });
+          await tx.commit();
+          logger.warn(`Message sent to DLQ: ${dTopic} (EOS)`);
+        } catch (txErr) {
+          try {
+            await tx.abort();
+          } catch {
+          }
+          logger.error(
+            `EOS DLQ routing to ${dTopic} failed \u2014 message will be redelivered:`,
+            toError(txErr).stack
+          );
+          return;
+        }
       } else {
         await onMessageLost?.({
           topic: originalTopic,
@@ -767,8 +881,8 @@ async function startLevelConsumer(level, levelTopics, levelGroupId, originalTopi
           attempt: level,
           headers
         });
+        await consumer.commitOffsets([nextOffset]);
       }
-      await consumer.commitOffsets([nextOffset]);
     }
   });
   runningConsumers.set(levelGroupId, "eachMessage");
@@ -806,6 +920,7 @@ var KafkaClient = class {
   kafka;
   producer;
   txProducer;
+  retryTxProducers = /* @__PURE__ */ new Set();
   consumers = /* @__PURE__ */ new Map();
   admin;
   logger;
@@ -823,6 +938,8 @@ var KafkaClient = class {
   onMessageLost;
   onRebalance;
   isAdminConnected = false;
+  inFlightTotal = 0;
+  drainResolvers = [];
   clientId;
   constructor(clientId, groupId, brokers, options) {
     this.clientId = clientId;
@@ -830,7 +947,8 @@ var KafkaClient = class {
     this.logger = options?.logger ?? {
       log: (msg) => console.log(`[KafkaClient:${clientId}] ${msg}`),
       warn: (msg, ...args) => console.warn(`[KafkaClient:${clientId}] ${msg}`, ...args),
-      error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args)
+      error: (msg, ...args) => console.error(`[KafkaClient:${clientId}] ${msg}`, ...args),
+      debug: (msg, ...args) => console.debug(`[KafkaClient:${clientId}] ${msg}`, ...args)
     };
     this.autoCreateTopicsEnabled = options?.autoCreateTopics ?? false;
     this.strictSchemasEnabled = options?.strictSchemas ?? true;
@@ -874,7 +992,7 @@ var KafkaClient = class {
   /** Execute multiple sends atomically. Commits on success, aborts on error. */
   async transaction(fn) {
     if (!this.txProducer) {
-      this.txProducer = this.kafka.producer({
+      const p = this.kafka.producer({
         kafkaJS: {
           acks: -1,
           idempotent: true,
@@ -882,7 +1000,8 @@ var KafkaClient = class {
           maxInFlightRequests: 1
         }
       });
-      await this.txProducer.connect();
+      await p.connect();
+      this.txProducer = p;
     }
     const tx = await this.txProducer.transaction();
     try {
@@ -899,9 +1018,12 @@ var KafkaClient = class {
             }
           ]);
           await tx.send(payload);
+          this.notifyAfterSend(payload.topic, payload.messages.length);
         },
         sendBatch: async (topicOrDesc, messages) => {
-          await tx.send(await this.preparePayload(topicOrDesc, messages));
+          const payload = await this.preparePayload(topicOrDesc, messages);
+          await tx.send(payload);
+          this.notifyAfterSend(payload.topic, payload.messages.length);
         }
       };
       await fn(ctx);
@@ -938,23 +1060,28 @@ var KafkaClient = class {
     const deps = this.messageDeps;
     const timeoutMs = options.handlerTimeoutMs;
     await consumer.run({
-      eachMessage: (payload) => handleEachMessage(
-        payload,
-        {
-          schemaMap,
-          handleMessage,
-          interceptors,
-          dlq,
-          retry,
-          retryTopics: options.retryTopics,
-          timeoutMs,
-          wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
-        },
-        deps
+      eachMessage: (payload) => this.trackInFlight(
+        () => handleEachMessage(
+          payload,
+          {
+            schemaMap,
+            handleMessage,
+            interceptors,
+            dlq,
+            retry,
+            retryTopics: options.retryTopics,
+            timeoutMs,
+            wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
+          },
+          deps
+        )
       )
     });
     this.runningConsumers.set(gid, "eachMessage");
     if (options.retryTopics && retry) {
+      if (!this.autoCreateTopicsEnabled) {
+        await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
+      }
       const companions = await startRetryTopicConsumers(
         topicNames,
         gid,
@@ -971,25 +1098,65 @@ var KafkaClient = class {
     return { groupId: gid, stop: () => this.stopConsumer(gid) };
   }
   async startBatchConsumer(topics, handleBatch, options = {}) {
-    const { consumer, schemaMap, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
+    if (options.retryTopics && !options.retry) {
+      throw new Error(
+        "retryTopics requires retry to be configured \u2014 set retry.maxRetries to enable the retry topic chain"
+      );
+    }
+    if (options.autoCommit !== false) {
+      this.logger.debug?.(
+        `startBatchConsumer: autoCommit is enabled (default true). If your handler calls resolveOffset() or commitOffsetsIfNecessary(), set autoCommit: false to avoid offset conflicts.`
+      );
+    }
+    const { consumer, schemaMap, topicNames, gid, dlq, interceptors, retry } = await this.setupConsumer(topics, "eachBatch", options);
     const deps = this.messageDeps;
     const timeoutMs = options.handlerTimeoutMs;
     await consumer.run({
-      eachBatch: (payload) => handleEachBatch(
-        payload,
-        {
-          schemaMap,
-          handleBatch,
-          interceptors,
-          dlq,
-          retry,
-          timeoutMs,
-          wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
-        },
-        deps
+      eachBatch: (payload) => this.trackInFlight(
+        () => handleEachBatch(
+          payload,
+          {
+            schemaMap,
+            handleBatch,
+            interceptors,
+            dlq,
+            retry,
+            retryTopics: options.retryTopics,
+            timeoutMs,
+            wrapWithTimeout: this.wrapWithTimeoutWarning.bind(this)
+          },
+          deps
+        )
       )
     });
     this.runningConsumers.set(gid, "eachBatch");
+    if (options.retryTopics && retry) {
+      if (!this.autoCreateTopicsEnabled) {
+        await this.validateRetryTopicsExist(topicNames, retry.maxRetries);
+      }
+      const handleMessageForRetry = (env) => handleBatch([env], {
+        partition: env.partition,
+        highWatermark: env.offset,
+        heartbeat: async () => {
+        },
+        resolveOffset: () => {
+        },
+        commitOffsetsIfNecessary: async () => {
+        }
+      });
+      const companions = await startRetryTopicConsumers(
+        topicNames,
+        gid,
+        handleMessageForRetry,
+        retry,
+        dlq,
+        interceptors,
+        schemaMap,
+        this.retryTopicDeps,
+        options.retryTopicAssignmentTimeoutMs
+      );
+      this.companionGroupIds.set(gid, companions);
+    }
     return { groupId: gid, stop: () => this.stopConsumer(gid) };
   }
   // ── Consumer lifecycle ───────────────────────────────────────────
@@ -1041,14 +1208,15 @@ var KafkaClient = class {
    */
   async getConsumerLag(groupId) {
     const gid = groupId ?? this.defaultGroupId;
-    if (!this.isAdminConnected) {
-      await this.admin.connect();
-      this.isAdminConnected = true;
-    }
+    await this.ensureAdminConnected();
     const committedByTopic = await this.admin.fetchOffsets({ groupId: gid });
+    const brokerOffsetsAll = await Promise.all(
+      committedByTopic.map(({ topic: topic2 }) => this.admin.fetchTopicOffsets(topic2))
+    );
     const result = [];
-    for (const { topic: topic2, partitions } of committedByTopic) {
-      const brokerOffsets = await this.admin.fetchTopicOffsets(topic2);
+    for (let i = 0; i < committedByTopic.length; i++) {
+      const { topic: topic2, partitions } = committedByTopic[i];
+      const brokerOffsets = brokerOffsetsAll[i];
       for (const { partition, offset } of partitions) {
         const broker = brokerOffsets.find((o) => o.partition === partition);
         if (!broker) continue;
@@ -1063,10 +1231,7 @@ var KafkaClient = class {
   /** Check broker connectivity. Never throws — returns a discriminated union. */
   async checkStatus() {
     try {
-      if (!this.isAdminConnected) {
-        await this.admin.connect();
-        this.isAdminConnected = true;
-      }
+      await this.ensureAdminConnected();
       const topics = await this.admin.listTopics();
       return { status: "up", clientId: this.clientId, topics };
     } catch (error) {
@@ -1081,12 +1246,17 @@ var KafkaClient = class {
     return this.clientId;
   }
   /** Gracefully disconnect producer, all consumers, and admin. */
-  async disconnect() {
+  async disconnect(drainTimeoutMs = 3e4) {
+    await this.waitForDrain(drainTimeoutMs);
     const tasks = [this.producer.disconnect()];
     if (this.txProducer) {
       tasks.push(this.txProducer.disconnect());
       this.txProducer = void 0;
     }
+    for (const p of this.retryTxProducers) {
+      tasks.push(p.disconnect());
+    }
+    this.retryTxProducers.clear();
     for (const consumer of this.consumers.values()) {
       tasks.push(consumer.disconnect());
     }
@@ -1101,9 +1271,59 @@ var KafkaClient = class {
     this.companionGroupIds.clear();
     this.logger.log("All connections closed");
   }
+  // ── Graceful shutdown ────────────────────────────────────────────
+  /**
+   * Register SIGTERM / SIGINT handlers that drain in-flight messages before
+   * disconnecting. Call this once after constructing the client in non-NestJS apps.
+   * NestJS apps get drain for free via `onModuleDestroy` → `disconnect()`.
+   */
+  enableGracefulShutdown(signals = ["SIGTERM", "SIGINT"], drainTimeoutMs = 3e4) {
+    const handler = () => {
+      this.logger.log(
+        "Shutdown signal received \u2014 draining in-flight handlers..."
+      );
+      this.disconnect(drainTimeoutMs).catch(
+        (err) => this.logger.error(
+          "Error during graceful shutdown:",
+          toError(err).message
+        )
+      );
+    };
+    for (const signal of signals) {
+      process.once(signal, handler);
+    }
+  }
+  trackInFlight(fn) {
+    this.inFlightTotal++;
+    return fn().finally(() => {
+      this.inFlightTotal--;
+      if (this.inFlightTotal === 0) {
+        this.drainResolvers.splice(0).forEach((r) => r());
+      }
+    });
+  }
+  waitForDrain(timeoutMs) {
+    if (this.inFlightTotal === 0) return Promise.resolve();
+    return new Promise((resolve) => {
+      let handle;
+      const onDrain = () => {
+        clearTimeout(handle);
+        resolve();
+      };
+      this.drainResolvers.push(onDrain);
+      handle = setTimeout(() => {
+        const idx = this.drainResolvers.indexOf(onDrain);
+        if (idx !== -1) this.drainResolvers.splice(idx, 1);
+        this.logger.warn(
+          `Drain timed out after ${timeoutMs}ms \u2014 ${this.inFlightTotal} handler(s) still in flight`
+        );
+        resolve();
+      }, timeoutMs);
+    });
+  }
   // ── Private helpers ──────────────────────────────────────────────
   async preparePayload(topicOrDesc, messages) {
-    registerSchema(topicOrDesc, this.schemaRegistry);
+    registerSchema(topicOrDesc, this.schemaRegistry, this.logger);
     const payload = await buildSendPayload(
       topicOrDesc,
       messages,
@@ -1136,12 +1356,78 @@ var KafkaClient = class {
     }, timeoutMs);
     return promise;
   }
-  async ensureTopic(topic2) {
-    if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
-    if (!this.isAdminConnected) {
+  /**
+   * When `retryTopics: true` and `autoCreateTopics: false`, verify that every
+   * `<topic>.retry.<level>` topic already exists. Throws a clear error at startup
+   * rather than silently discovering missing topics on the first handler failure.
+   */
+  async validateRetryTopicsExist(topicNames, maxRetries) {
+    await this.ensureAdminConnected();
+    const existing = new Set(await this.admin.listTopics());
+    const missing = [];
+    for (const t of topicNames) {
+      for (let level = 1; level <= maxRetries; level++) {
+        const retryTopic = `${t}.retry.${level}`;
+        if (!existing.has(retryTopic)) missing.push(retryTopic);
+      }
+    }
+    if (missing.length > 0) {
+      throw new Error(
+        `retryTopics: true but the following retry topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
+      );
+    }
+  }
+  /**
+   * When `autoCreateTopics` is disabled, verify that `<topic>.dlq` exists for every
+   * consumed topic. Throws a clear error at startup rather than silently discovering
+   * missing DLQ topics on the first handler failure.
+   */
+  async validateDlqTopicsExist(topicNames) {
+    await this.ensureAdminConnected();
+    const existing = new Set(await this.admin.listTopics());
+    const missing = topicNames.filter((t) => !existing.has(`${t}.dlq`)).map((t) => `${t}.dlq`);
+    if (missing.length > 0) {
+      throw new Error(
+        `dlq: true but the following DLQ topics do not exist: ${missing.join(", ")}. Create them manually or set autoCreateTopics: true.`
+      );
+    }
+  }
+  /**
+   * Connect the admin client if not already connected.
+   * The flag is only set to `true` after a successful connect — if `admin.connect()`
+   * throws the flag remains `false` so the next call will retry the connection.
+   */
+  async ensureAdminConnected() {
+    if (this.isAdminConnected) return;
+    try {
       await this.admin.connect();
       this.isAdminConnected = true;
+    } catch (err) {
+      this.isAdminConnected = false;
+      throw err;
     }
+  }
+  /**
+   * Create and connect a transactional producer for EOS retry routing.
+   * Each retry level consumer gets its own producer with a unique `transactionalId`
+   * so Kafka can fence stale producers on restart without affecting other levels.
+   */
+  async createRetryTxProducer(transactionalId) {
+    const p = this.kafka.producer({
+      kafkaJS: {
+        acks: -1,
+        idempotent: true,
+        transactionalId,
+        maxInFlightRequests: 1
+      }
+    });
+    await p.connect();
+    this.retryTxProducers.add(p);
+    return p;
+  }
+  async ensureTopic(topic2) {
+    if (!this.autoCreateTopicsEnabled || this.ensuredTopics.has(topic2)) return;
+    await this.ensureAdminConnected();
     await this.admin.createTopics({
       topics: [{ topic: topic2, numPartitions: this.numPartitions }]
     });
@@ -1174,7 +1460,8 @@ var KafkaClient = class {
     const schemaMap = buildSchemaMap(
       topics,
       this.schemaRegistry,
-      optionSchemas
+      optionSchemas,
+      this.logger
     );
     const topicNames = topics.map((t) => resolveTopicName(t));
     for (const t of topicNames) {
@@ -1184,6 +1471,9 @@ var KafkaClient = class {
       for (const t of topicNames) {
         await this.ensureTopic(`${t}.dlq`);
       }
+      if (!this.autoCreateTopicsEnabled) {
+        await this.validateDlqTopicsExist(topicNames);
+      }
     }
     await consumer.connect();
     await subscribeWithRetry(
@@ -1202,7 +1492,8 @@ var KafkaClient = class {
     return {
       schemaRegistry: this.schemaRegistry,
       strictSchemasEnabled: this.strictSchemasEnabled,
-      instrumentation: this.instrumentation
+      instrumentation: this.instrumentation,
+      logger: this.logger
     };
   }
   get consumerOpsDeps() {
@@ -1230,7 +1521,8 @@ var KafkaClient = class {
       onMessageLost: this.onMessageLost,
       ensureTopic: (t) => this.ensureTopic(t),
       getOrCreateConsumer: (gid, fb, ac) => getOrCreateConsumer(gid, fb, ac, this.consumerOpsDeps),
-      runningConsumers: this.runningConsumers
+      runningConsumers: this.runningConsumers,
+      createRetryTxProducer: (txId) => this.createRetryTxProducer(txId)
     };
   }
 };