@eventferry/kafka 3.4.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +179 -0
- package/dist/index.cjs +181 -11
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +147 -3
- package/dist/index.d.ts +147 -3
- package/dist/index.js +181 -11
- package/dist/index.js.map +1 -1
- package/package.json +7 -4
package/dist/index.js
CHANGED
|
@@ -7,6 +7,7 @@ function classifyKafkajsError(err) {
|
|
|
7
7
|
if (e.name === "KafkaJSNonRetriableError") return "fatal";
|
|
8
8
|
const type = typeof e.type === "string" ? e.type : void 0;
|
|
9
9
|
if (type) {
|
|
10
|
+
if (FENCED_TYPES.has(type)) return "fenced";
|
|
10
11
|
if (RETRIABLE_TYPES.has(type)) return "retriable";
|
|
11
12
|
if (POISON_TYPES.has(type)) return "poison";
|
|
12
13
|
if (FATAL_TYPES.has(type)) return "fatal";
|
|
@@ -40,9 +41,11 @@ var POISON_TYPES = /* @__PURE__ */ new Set([
|
|
|
40
41
|
"INVALID_REQUIRED_ACKS",
|
|
41
42
|
"INVALID_PARTITIONS"
|
|
42
43
|
]);
|
|
43
|
-
var
|
|
44
|
+
var FENCED_TYPES = /* @__PURE__ */ new Set([
|
|
44
45
|
"INVALID_PRODUCER_EPOCH",
|
|
45
|
-
"PRODUCER_FENCED"
|
|
46
|
+
"PRODUCER_FENCED"
|
|
47
|
+
]);
|
|
48
|
+
var FATAL_TYPES = /* @__PURE__ */ new Set([
|
|
46
49
|
"TOPIC_AUTHORIZATION_FAILED",
|
|
47
50
|
"CLUSTER_AUTHORIZATION_FAILED",
|
|
48
51
|
"TRANSACTIONAL_ID_AUTHORIZATION_FAILED",
|
|
@@ -73,8 +76,8 @@ var CODE_TO_KIND = /* @__PURE__ */ new Map([
|
|
|
73
76
|
// TOPIC_AUTHORIZATION_FAILED
|
|
74
77
|
[31, "fatal"],
|
|
75
78
|
// CLUSTER_AUTHORIZATION_FAILED
|
|
76
|
-
[47, "
|
|
77
|
-
// INVALID_PRODUCER_EPOCH
|
|
79
|
+
[47, "fenced"],
|
|
80
|
+
// INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
|
|
78
81
|
[58, "fatal"],
|
|
79
82
|
// SASL_AUTHENTICATION_FAILED
|
|
80
83
|
[74, "retriable"],
|
|
@@ -107,7 +110,10 @@ var UNSUPPORTED_BY_KAFKAJS = [
|
|
|
107
110
|
"maxRequestSize",
|
|
108
111
|
// Confluent-only escape hatches; ignored on kafkajs.
|
|
109
112
|
"compressionLevel",
|
|
110
|
-
"rawProducerConfig"
|
|
113
|
+
"rawProducerConfig",
|
|
114
|
+
// librdkafka stats — kafkajs has no equivalent surface.
|
|
115
|
+
"onStats",
|
|
116
|
+
"statsIntervalMs"
|
|
111
117
|
];
|
|
112
118
|
var KafkaJsDriver = class {
|
|
113
119
|
transactional;
|
|
@@ -393,8 +399,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
|
|
|
393
399
|
// ERR__TRANSPORT
|
|
394
400
|
[-198, "poison"],
|
|
395
401
|
// ERR__BAD_COMPRESSION
|
|
396
|
-
[-144, "
|
|
397
|
-
// ERR__FENCED — producer fenced
|
|
402
|
+
[-144, "fenced"],
|
|
403
|
+
// ERR__FENCED — producer fenced; publisher reconnect attempts a transparent recovery once
|
|
398
404
|
[-150, "fatal"],
|
|
399
405
|
// ERR__FATAL — unrecoverable librdkafka error
|
|
400
406
|
[-169, "fatal"],
|
|
@@ -426,8 +432,8 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
|
|
|
426
432
|
// TOPIC_AUTHORIZATION_FAILED
|
|
427
433
|
[31, "fatal"],
|
|
428
434
|
// CLUSTER_AUTHORIZATION_FAILED
|
|
429
|
-
[47, "
|
|
430
|
-
// INVALID_PRODUCER_EPOCH
|
|
435
|
+
[47, "fenced"],
|
|
436
|
+
// INVALID_PRODUCER_EPOCH — retryable once via publisher reconnect
|
|
431
437
|
[58, "fatal"],
|
|
432
438
|
// SASL_AUTHENTICATION_FAILED
|
|
433
439
|
[74, "retriable"],
|
|
@@ -441,7 +447,7 @@ var CODE_TO_KIND2 = /* @__PURE__ */ new Map([
|
|
|
441
447
|
]);
|
|
442
448
|
var NAME_TO_KIND = /* @__PURE__ */ new Map([
|
|
443
449
|
["ERR__QUEUE_FULL", "backpressure"],
|
|
444
|
-
["ERR__FENCED", "
|
|
450
|
+
["ERR__FENCED", "fenced"],
|
|
445
451
|
["ERR__FATAL", "fatal"],
|
|
446
452
|
["ERR__AUTHENTICATION", "fatal"],
|
|
447
453
|
["ERR__SSL", "fatal"],
|
|
@@ -450,7 +456,7 @@ var NAME_TO_KIND = /* @__PURE__ */ new Map([
|
|
|
450
456
|
["ERR__BAD_COMPRESSION", "poison"],
|
|
451
457
|
["ERR_TOPIC_AUTHORIZATION_FAILED", "fatal"],
|
|
452
458
|
["ERR_CLUSTER_AUTHORIZATION_FAILED", "fatal"],
|
|
453
|
-
["ERR_INVALID_PRODUCER_EPOCH", "
|
|
459
|
+
["ERR_INVALID_PRODUCER_EPOCH", "fenced"],
|
|
454
460
|
["ERR_SASL_AUTHENTICATION_FAILED", "fatal"],
|
|
455
461
|
["ERR_CORRUPT_MESSAGE", "poison"],
|
|
456
462
|
["ERR_MSG_SIZE_TOO_LARGE", "poison"],
|
|
@@ -486,6 +492,12 @@ function buildConfluentClientConfig(opts) {
|
|
|
486
492
|
if (opts.compressionLevel !== void 0) {
|
|
487
493
|
librdkafka["compression.level"] = opts.compressionLevel;
|
|
488
494
|
}
|
|
495
|
+
if (opts.onStats) {
|
|
496
|
+
librdkafka["stats_cb"] = wrapStatsCallback(opts.onStats);
|
|
497
|
+
librdkafka["statistics.interval.ms"] = opts.statsIntervalMs ?? 3e4;
|
|
498
|
+
} else if (opts.statsIntervalMs !== void 0) {
|
|
499
|
+
librdkafka["statistics.interval.ms"] = opts.statsIntervalMs;
|
|
500
|
+
}
|
|
489
501
|
const tlsRequested = opts.ssl === true || isTlsConfig(opts.ssl);
|
|
490
502
|
const saslRequested = !!opts.sasl;
|
|
491
503
|
if (saslRequested && tlsRequested) {
|
|
@@ -523,6 +535,20 @@ function buildConfluentClientConfig(opts) {
|
|
|
523
535
|
function isTlsConfig(v) {
|
|
524
536
|
return typeof v === "object" && v !== null;
|
|
525
537
|
}
|
|
538
|
+
function wrapStatsCallback(onStats) {
|
|
539
|
+
return (raw) => {
|
|
540
|
+
let parsed;
|
|
541
|
+
try {
|
|
542
|
+
parsed = typeof raw === "string" ? JSON.parse(raw) : raw;
|
|
543
|
+
} catch {
|
|
544
|
+
return;
|
|
545
|
+
}
|
|
546
|
+
try {
|
|
547
|
+
onStats(parsed);
|
|
548
|
+
} catch {
|
|
549
|
+
}
|
|
550
|
+
};
|
|
551
|
+
}
|
|
526
552
|
function stringifyPem(input) {
|
|
527
553
|
if (Array.isArray(input)) {
|
|
528
554
|
return input.map((x) => typeof x === "string" ? x : x.toString("utf8")).join("\n");
|
|
@@ -764,11 +790,17 @@ var KafkaPublisher = class {
|
|
|
764
790
|
hooks;
|
|
765
791
|
tracer;
|
|
766
792
|
validateTopicsOnConnect;
|
|
793
|
+
autoRecoverFromFence;
|
|
794
|
+
// Serialize reconnects so concurrent publish() calls hitting a fence
|
|
795
|
+
// all observe the same single reconnect attempt — the second publish
|
|
796
|
+
// doesn't try to disconnect a producer the first is still re-initing.
|
|
797
|
+
fenceRecovery = null;
|
|
767
798
|
constructor(opts) {
|
|
768
799
|
this.logger = opts.logger;
|
|
769
800
|
this.hooks = opts.hooks ?? {};
|
|
770
801
|
this.tracer = opts.tracer ?? new NoopKafkaTracer();
|
|
771
802
|
this.validateTopicsOnConnect = opts.validateTopicsOnConnect ? Object.freeze([...opts.validateTopicsOnConnect]) : void 0;
|
|
803
|
+
this.autoRecoverFromFence = opts.autoRecoverFromFence ?? false;
|
|
772
804
|
const onTransactionAbort = this.hooks.onTransactionAbort ? (error) => {
|
|
773
805
|
void safeHook(
|
|
774
806
|
this.logger,
|
|
@@ -891,6 +923,20 @@ var KafkaPublisher = class {
|
|
|
891
923
|
await safeHook(this.logger, "onError", () => this.hooks.onError?.(error));
|
|
892
924
|
throw err;
|
|
893
925
|
}
|
|
926
|
+
const firstFenced = results.find(
|
|
927
|
+
(r) => !r.ok && r.errorKind === "fenced"
|
|
928
|
+
);
|
|
929
|
+
if (firstFenced) {
|
|
930
|
+
const fenceErr = firstFenced.error ?? new Error("producer fenced");
|
|
931
|
+
await safeHook(
|
|
932
|
+
this.logger,
|
|
933
|
+
"onProducerFenced",
|
|
934
|
+
() => this.hooks.onProducerFenced?.(fenceErr)
|
|
935
|
+
);
|
|
936
|
+
if (this.autoRecoverFromFence) {
|
|
937
|
+
results = await this.recoverAndRetry(outgoing, results);
|
|
938
|
+
}
|
|
939
|
+
}
|
|
894
940
|
const byId = new Map(messages.map((m) => [m.recordId, m]));
|
|
895
941
|
let allOk = true;
|
|
896
942
|
for (const r of results) {
|
|
@@ -941,6 +987,110 @@ var KafkaPublisher = class {
|
|
|
941
987
|
get transactional() {
|
|
942
988
|
return this.driver.transactional;
|
|
943
989
|
}
|
|
990
|
+
/**
|
|
991
|
+
* Cheap reachability probe. Borrows a fresh admin client, calls
|
|
992
|
+
* `listTopics`, and returns timing + outcome. Useful as the body of a
|
|
993
|
+
* `/healthz` or `/readyz` endpoint — proves the broker is reachable
|
|
994
|
+
* AND that the configured credentials still authenticate against it,
|
|
995
|
+
* without writing a record.
|
|
996
|
+
*
|
|
997
|
+
* Does NOT exercise the producer's send path — a healthy admin
|
|
998
|
+
* connection doesn't guarantee `publish()` will succeed (a fenced
|
|
999
|
+
* transactional producer would still answer healthy here). Treat this
|
|
1000
|
+
* as "broker reachable + auth still good", not "publisher is fully
|
|
1001
|
+
* operational".
|
|
1002
|
+
*
|
|
1003
|
+
* Default timeout 5_000 ms — long enough to ride out a single broker
|
|
1004
|
+
* leader election, short enough to fail a liveness probe meaningfully.
|
|
1005
|
+
* Set `timeoutMs: 0` to disable the timer entirely.
|
|
1006
|
+
*
|
|
1007
|
+
* The driver must implement `admin()` (the built-ins do); custom
|
|
1008
|
+
* drivers without admin get `{ ok: false, error: ... }` instead of
|
|
1009
|
+
* the throw `publisher.admin()` would surface — health checks are
|
|
1010
|
+
* not the place to crash.
|
|
1011
|
+
*/
|
|
1012
|
+
async healthCheck(opts = {}) {
|
|
1013
|
+
const timeoutMs = opts.timeoutMs ?? 5e3;
|
|
1014
|
+
const startedAt = Date.now();
|
|
1015
|
+
if (!this.driver.admin) {
|
|
1016
|
+
return {
|
|
1017
|
+
ok: false,
|
|
1018
|
+
latencyMs: 0,
|
|
1019
|
+
timestamp: startedAt,
|
|
1020
|
+
error: new Error(
|
|
1021
|
+
"KafkaPublisher.healthCheck: configured driver does not implement admin()"
|
|
1022
|
+
)
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
let admin = null;
|
|
1026
|
+
try {
|
|
1027
|
+
admin = await this.driver.admin();
|
|
1028
|
+
await admin.connect();
|
|
1029
|
+
const probe = admin.listTopics();
|
|
1030
|
+
if (timeoutMs > 0) {
|
|
1031
|
+
await raceWithTimeout(probe, timeoutMs, "healthCheck");
|
|
1032
|
+
} else {
|
|
1033
|
+
await probe;
|
|
1034
|
+
}
|
|
1035
|
+
return {
|
|
1036
|
+
ok: true,
|
|
1037
|
+
latencyMs: Date.now() - startedAt,
|
|
1038
|
+
timestamp: startedAt
|
|
1039
|
+
};
|
|
1040
|
+
} catch (err) {
|
|
1041
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
1042
|
+
return {
|
|
1043
|
+
ok: false,
|
|
1044
|
+
latencyMs: Date.now() - startedAt,
|
|
1045
|
+
timestamp: startedAt,
|
|
1046
|
+
error
|
|
1047
|
+
};
|
|
1048
|
+
} finally {
|
|
1049
|
+
try {
|
|
1050
|
+
await admin?.close();
|
|
1051
|
+
} catch {
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Disconnect + re-connect the driver and re-send the batch ONCE. Used
|
|
1057
|
+
* by the fence-recovery path. Concurrent fence recoveries dedupe on a
|
|
1058
|
+
* shared in-flight promise (`fenceRecovery`) so we don't tear the
|
|
1059
|
+
* producer down while another batch is mid-restart.
|
|
1060
|
+
*
|
|
1061
|
+
* If the second send STILL reports any fenced records, those failures
|
|
1062
|
+
* are returned unchanged — another instance has almost certainly taken
|
|
1063
|
+
* the same `transactionalId` and silently retrying again would mask
|
|
1064
|
+
* the misconfiguration.
|
|
1065
|
+
*/
|
|
1066
|
+
async recoverAndRetry(outgoing, firstResults) {
|
|
1067
|
+
if (!this.fenceRecovery) {
|
|
1068
|
+
this.fenceRecovery = (async () => {
|
|
1069
|
+
try {
|
|
1070
|
+
await this.driver.disconnect();
|
|
1071
|
+
await this.driver.connect();
|
|
1072
|
+
} finally {
|
|
1073
|
+
this.fenceRecovery = null;
|
|
1074
|
+
}
|
|
1075
|
+
})();
|
|
1076
|
+
}
|
|
1077
|
+
try {
|
|
1078
|
+
await this.fenceRecovery;
|
|
1079
|
+
} catch (err) {
|
|
1080
|
+
const reconnectErr = err instanceof Error ? err : new Error(String(err));
|
|
1081
|
+
await safeHook(
|
|
1082
|
+
this.logger,
|
|
1083
|
+
"onError",
|
|
1084
|
+
() => this.hooks.onError?.(reconnectErr)
|
|
1085
|
+
);
|
|
1086
|
+
return firstResults;
|
|
1087
|
+
}
|
|
1088
|
+
try {
|
|
1089
|
+
return await this.driver.sendBatch(outgoing);
|
|
1090
|
+
} catch {
|
|
1091
|
+
return firstResults;
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
944
1094
|
/**
|
|
945
1095
|
* Start a span for the batch following the OTel messaging conventions.
|
|
946
1096
|
*
|
|
@@ -959,6 +1109,26 @@ var KafkaPublisher = class {
|
|
|
959
1109
|
});
|
|
960
1110
|
}
|
|
961
1111
|
};
|
|
1112
|
+
function raceWithTimeout(p, ms, label) {
|
|
1113
|
+
return new Promise((resolve, reject) => {
|
|
1114
|
+
const timer = setTimeout(() => {
|
|
1115
|
+
reject(new Error(`${label} timed out after ${ms}ms`));
|
|
1116
|
+
}, ms);
|
|
1117
|
+
if (typeof timer.unref === "function") {
|
|
1118
|
+
timer.unref();
|
|
1119
|
+
}
|
|
1120
|
+
p.then(
|
|
1121
|
+
(v) => {
|
|
1122
|
+
clearTimeout(timer);
|
|
1123
|
+
resolve(v);
|
|
1124
|
+
},
|
|
1125
|
+
(e) => {
|
|
1126
|
+
clearTimeout(timer);
|
|
1127
|
+
reject(e);
|
|
1128
|
+
}
|
|
1129
|
+
);
|
|
1130
|
+
});
|
|
1131
|
+
}
|
|
962
1132
|
function selectDriver(opts) {
|
|
963
1133
|
const kind = opts.driver ?? "kafkajs";
|
|
964
1134
|
switch (kind) {
|