@closeup1202/klag 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -13
- package/dist/cli/index.js +315 -67
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
> Know **why** your Kafka consumer lag is growing — in 5 seconds from the terminal
|
|
4
4
|
|
|
5
|
-
[](https://www.npmjs.com/package/@closeup1202/klag)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
|
|
8
8
|
## Compared to existing tools
|
|
@@ -21,21 +21,21 @@ npx @closeup1202/klag --broker localhost:9092 --group my-service
|
|
|
21
21
|
|
|
22
22
|
## Output example
|
|
23
23
|
```
|
|
24
|
-
⚡ klag
|
|
24
|
+
⚡ klag 0.3.0
|
|
25
25
|
|
|
26
26
|
🔍 Consumer Group: my-service
|
|
27
27
|
Broker: localhost:9092
|
|
28
|
-
Collected At: 2026-03-
|
|
28
|
+
Collected At: 2026-03-28 17:27:27 (Asia/Seoul)
|
|
29
29
|
|
|
30
|
-
Group Status :
|
|
30
|
+
Group Status : 🚨 CRITICAL Total Lag : 1,234 Drain : ∞
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
│ Topic │ Partition │ Committed Offset │ Log-End Offset │ Lag │ Status │ Produce Rate │ Consume Rate │
|
|
34
|
-
|
|
35
|
-
│ orders │ 0 │ 8,796 │ 10,000 │ 1,204 │ 🔴 HIGH │ 40.0 msg/s │ 0.0 msg/s │
|
|
36
|
-
│ orders │ 1 │ 9,988 │ 10,000 │ 12 │ 🟢 OK │ 0.0 msg/s │ 0.0 msg/s │
|
|
37
|
-
│ orders │ 2 │ 9,982 │ 10,000 │ 18 │ 🟢 OK │ 0.0 msg/s │ 0.0 msg/s │
|
|
38
|
-
|
|
32
|
+
┌────────┬───────────┬──────────────────┬────────────────┬───────┬─────────┬──────┬──────────────┬──────────────┐
|
|
33
|
+
│ Topic │ Partition │ Committed Offset │ Log-End Offset │ Lag │ Status │ Drain│ Produce Rate │ Consume Rate │
|
|
34
|
+
├────────┼───────────┼──────────────────┼────────────────┼───────┼─────────┼──────┼──────────────┼──────────────┤
|
|
35
|
+
│ orders │ 0 │ 8,796 │ 10,000 │ 1,204 │ 🔴 HIGH │ ∞ │ 40.0 msg/s │ 0.0 msg/s │
|
|
36
|
+
│ orders │ 1 │ 9,988 │ 10,000 │ 12 │ 🟢 OK │ — │ 0.0 msg/s │ 0.0 msg/s │
|
|
37
|
+
│ orders │ 2 │ 9,982 │ 10,000 │ 18 │ 🟢 OK │ — │ 0.0 msg/s │ 0.0 msg/s │
|
|
38
|
+
└────────┴───────────┴──────────────────┴────────────────┴───────┴─────────┴──────┴──────────────┴──────────────┘
|
|
39
39
|
|
|
40
40
|
🔎 Root Cause Analysis
|
|
41
41
|
[PRODUCER_BURST] orders
|
|
@@ -66,6 +66,48 @@ klag --broker localhost:9092 --group my-service --watch --interval 3000
|
|
|
66
66
|
|
|
67
67
|
# JSON output (CI/pipeline integration)
|
|
68
68
|
klag --broker localhost:9092 --group my-service --json
|
|
69
|
+
|
|
70
|
+
# SSL (system CA trust)
|
|
71
|
+
klag --broker kafka.prod:9092 --group my-service --ssl
|
|
72
|
+
|
|
73
|
+
# SSL with custom certificates
|
|
74
|
+
klag --broker kafka.prod:9092 --group my-service \
|
|
75
|
+
--ssl --ssl-ca /etc/kafka/ca.pem \
|
|
76
|
+
--ssl-cert /etc/kafka/client.crt --ssl-key /etc/kafka/client.key
|
|
77
|
+
|
|
78
|
+
# SASL authentication (password via environment variable — recommended)
|
|
79
|
+
KLAG_SASL_PASSWORD=secret klag --broker kafka.prod:9092 --group my-service \
|
|
80
|
+
--sasl-mechanism scram-sha-256 --sasl-username kafka-user
|
|
81
|
+
|
|
82
|
+
# SSL + SASL combined
|
|
83
|
+
KLAG_SASL_PASSWORD=secret klag --broker kafka.prod:9092 --group my-service \
|
|
84
|
+
--ssl --sasl-mechanism scram-sha-256 --sasl-username kafka-user
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Config file (.klagrc)
|
|
88
|
+
|
|
89
|
+
Create `.klagrc` in the current directory or `~/.klagrc` to store default options.
|
|
90
|
+
CLI arguments always take precedence over the config file.
|
|
91
|
+
|
|
92
|
+
```json
|
|
93
|
+
{
|
|
94
|
+
"broker": "kafka.prod.internal:9092",
|
|
95
|
+
"group": "my-service",
|
|
96
|
+
"interval": 3000,
|
|
97
|
+
"ssl": {
|
|
98
|
+
"enabled": true,
|
|
99
|
+
"caPath": "/etc/kafka/ca.pem"
|
|
100
|
+
},
|
|
101
|
+
"sasl": {
|
|
102
|
+
"mechanism": "scram-sha-256",
|
|
103
|
+
"username": "kafka-user"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
With this file in place, you only need:
|
|
109
|
+
```bash
|
|
110
|
+
KLAG_SASL_PASSWORD=secret klag
|
|
69
111
|
```
|
|
70
112
|
|
|
71
113
|
## Options
|
|
@@ -79,6 +121,13 @@ klag --broker localhost:9092 --group my-service --json
|
|
|
79
121
|
| `-w, --watch` | Watch mode | `false` |
|
|
80
122
|
| `--no-rate` | Skip rate sampling | `false` |
|
|
81
123
|
| `--json` | JSON output | `false` |
|
|
124
|
+
| `--ssl` | Enable SSL/TLS | `false` |
|
|
125
|
+
| `--ssl-ca <path>` | CA certificate PEM file | - |
|
|
126
|
+
| `--ssl-cert <path>` | Client certificate PEM file | - |
|
|
127
|
+
| `--ssl-key <path>` | Client key PEM file | - |
|
|
128
|
+
| `--sasl-mechanism <type>` | `plain`, `scram-sha-256`, `scram-sha-512` | - |
|
|
129
|
+
| `--sasl-username <user>` | SASL username | - |
|
|
130
|
+
| `--sasl-password <pass>` | SASL password (prefer `KLAG_SASL_PASSWORD` env var) | - |
|
|
82
131
|
|
|
83
132
|
## Detectable root causes
|
|
84
133
|
|
|
@@ -106,8 +155,10 @@ All consumption pauses during rebalancing, which can cause a temporary lag spike
|
|
|
106
155
|
## Roadmap
|
|
107
156
|
|
|
108
157
|
- [x] v0.1.0 — lag collection, hot partition, producer burst, slow consumer, rebalancing detection, watch mode with lag trend (▲▼)
|
|
109
|
-
- [
|
|
110
|
-
- [
|
|
158
|
+
- [x] v0.2.0 — SSL/SASL authentication, `.klagrc` config file support
|
|
159
|
+
- [x] v0.3.0 — time-to-drain severity classification, Drain column per partition
|
|
160
|
+
- [ ] v0.4.0 — multi-group monitoring
|
|
161
|
+
- [ ] v0.5.0 — Slack alerts, Prometheus export
|
|
111
162
|
|
|
112
163
|
## License
|
|
113
164
|
|
package/dist/cli/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/cli/index.ts
|
|
4
|
-
import
|
|
4
|
+
import chalk5 from "chalk";
|
|
5
5
|
import { Command } from "commander";
|
|
6
6
|
|
|
7
7
|
// src/analyzer/burstDetector.ts
|
|
@@ -138,20 +138,49 @@ function analyze(snapshot, rateSnapshot) {
|
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
// src/collector/lagCollector.ts
|
|
141
|
-
import { AssignerProtocol
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
import { AssignerProtocol } from "kafkajs";
|
|
142
|
+
|
|
143
|
+
// src/collector/kafkaFactory.ts
|
|
144
|
+
import { readFileSync } from "fs";
|
|
145
|
+
import { Kafka, logLevel } from "kafkajs";
|
|
146
|
+
function createKafkaClient(clientId, options) {
|
|
147
|
+
return new Kafka({
|
|
148
|
+
clientId,
|
|
145
149
|
brokers: [options.broker],
|
|
146
150
|
logLevel: logLevel.NOTHING,
|
|
147
|
-
// Hide kafkajs internal logs in CLI
|
|
148
151
|
requestTimeout: options.timeoutMs ?? 5e3,
|
|
149
152
|
connectionTimeout: options.timeoutMs ?? 3e3,
|
|
150
|
-
retry: {
|
|
151
|
-
|
|
152
|
-
|
|
153
|
+
retry: { retries: 1 },
|
|
154
|
+
...options.ssl && { ssl: buildSslConfig(options.ssl) },
|
|
155
|
+
...options.sasl?.password && {
|
|
156
|
+
sasl: buildSaslConfig(
|
|
157
|
+
options.sasl
|
|
158
|
+
)
|
|
153
159
|
}
|
|
154
160
|
});
|
|
161
|
+
}
|
|
162
|
+
function buildSaslConfig(sasl) {
|
|
163
|
+
const { mechanism, username, password } = sasl;
|
|
164
|
+
if (mechanism === "plain") return { mechanism: "plain", username, password };
|
|
165
|
+
if (mechanism === "scram-sha-256")
|
|
166
|
+
return { mechanism: "scram-sha-256", username, password };
|
|
167
|
+
return { mechanism: "scram-sha-512", username, password };
|
|
168
|
+
}
|
|
169
|
+
function buildSslConfig(ssl) {
|
|
170
|
+
if (!ssl) return {};
|
|
171
|
+
if (!ssl.caPath && !ssl.certPath && !ssl.keyPath) {
|
|
172
|
+
return true;
|
|
173
|
+
}
|
|
174
|
+
return {
|
|
175
|
+
...ssl.caPath && { ca: [readFileSync(ssl.caPath)] },
|
|
176
|
+
...ssl.certPath && { cert: readFileSync(ssl.certPath) },
|
|
177
|
+
...ssl.keyPath && { key: readFileSync(ssl.keyPath) }
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// src/collector/lagCollector.ts
|
|
182
|
+
async function collectLag(options) {
|
|
183
|
+
const kafka = createKafkaClient("klag", options);
|
|
155
184
|
const admin = kafka.admin();
|
|
156
185
|
try {
|
|
157
186
|
await admin.connect();
|
|
@@ -170,7 +199,9 @@ async function collectLag(options) {
|
|
|
170
199
|
const decoded = AssignerProtocol.MemberAssignment.decode(
|
|
171
200
|
member.memberAssignment
|
|
172
201
|
);
|
|
173
|
-
for (const [topic, partitions2] of Object.entries(
|
|
202
|
+
for (const [topic, partitions2] of Object.entries(
|
|
203
|
+
decoded?.assignment ?? {}
|
|
204
|
+
)) {
|
|
174
205
|
if (!topicPartitionMap.has(topic)) {
|
|
175
206
|
topicPartitionMap.set(topic, /* @__PURE__ */ new Set());
|
|
176
207
|
}
|
|
@@ -250,21 +281,10 @@ async function collectLag(options) {
|
|
|
250
281
|
}
|
|
251
282
|
|
|
252
283
|
// src/collector/rateCollector.ts
|
|
253
|
-
import { Kafka as Kafka2, logLevel as logLevel2 } from "kafkajs";
|
|
254
284
|
async function collectRate(options, knownTopics) {
|
|
255
285
|
const intervalMs = options.intervalMs ?? 5e3;
|
|
256
286
|
const intervalSec = intervalMs / 1e3;
|
|
257
|
-
const kafka =
|
|
258
|
-
clientId: "klag-rate",
|
|
259
|
-
brokers: [options.broker],
|
|
260
|
-
logLevel: logLevel2.NOTHING,
|
|
261
|
-
requestTimeout: options.timeoutMs ?? 5e3,
|
|
262
|
-
connectionTimeout: options.timeoutMs ?? 3e3,
|
|
263
|
-
retry: {
|
|
264
|
-
retries: 1
|
|
265
|
-
// Added — only 1 retry (default is 5)
|
|
266
|
-
}
|
|
267
|
-
});
|
|
287
|
+
const kafka = createKafkaClient("klag-rate", options);
|
|
268
288
|
const admin = kafka.admin();
|
|
269
289
|
try {
|
|
270
290
|
await admin.connect();
|
|
@@ -343,12 +363,30 @@ import chalk from "chalk";
|
|
|
343
363
|
import Table from "cli-table3";
|
|
344
364
|
|
|
345
365
|
// src/types/index.ts
|
|
346
|
-
var VERSION = "0.
|
|
347
|
-
function classifyLag(lag) {
|
|
348
|
-
if (lag
|
|
349
|
-
if (
|
|
366
|
+
var VERSION = "0.3.0";
|
|
367
|
+
function classifyLag(lag, consumeRate) {
|
|
368
|
+
if (lag === 0n) return "OK";
|
|
369
|
+
if (consumeRate !== void 0) {
|
|
370
|
+
if (consumeRate === 0) return "HIGH";
|
|
371
|
+
const drainSec = Number(lag) / consumeRate;
|
|
372
|
+
if (drainSec < 60) return "OK";
|
|
373
|
+
if (drainSec < 300) return "WARN";
|
|
374
|
+
return "HIGH";
|
|
375
|
+
}
|
|
376
|
+
if (lag < 10000n) return "OK";
|
|
377
|
+
if (lag < 100000n) return "WARN";
|
|
350
378
|
return "HIGH";
|
|
351
379
|
}
|
|
380
|
+
function formatDrainTime(lag, consumeRate) {
|
|
381
|
+
if (lag === 0n) return "\u2014";
|
|
382
|
+
if (consumeRate === 0) return "\u221E";
|
|
383
|
+
const sec = Math.ceil(Number(lag) / consumeRate);
|
|
384
|
+
if (sec < 60) return `${sec}s`;
|
|
385
|
+
const m = Math.floor(sec / 60);
|
|
386
|
+
const s = sec % 60;
|
|
387
|
+
if (sec < 3600) return s > 0 ? `${m}m${s}s` : `${m}m`;
|
|
388
|
+
return `>${Math.floor(sec / 3600)}h`;
|
|
389
|
+
}
|
|
352
390
|
|
|
353
391
|
// src/reporter/tableReporter.ts
|
|
354
392
|
var LEVEL_ICON = {
|
|
@@ -368,8 +406,8 @@ function formatTrend(lagDiff) {
|
|
|
368
406
|
if (lagDiff > 0n) return chalk.red(`\u25B2 +${lagDiff.toLocaleString()}`);
|
|
369
407
|
return chalk.green(`\u25BC ${lagDiff.toLocaleString()}`);
|
|
370
408
|
}
|
|
371
|
-
function groupStatus(totalLag) {
|
|
372
|
-
const level = classifyLag(totalLag);
|
|
409
|
+
function groupStatus(totalLag, totalConsumeRate) {
|
|
410
|
+
const level = classifyLag(totalLag, totalConsumeRate);
|
|
373
411
|
if (level === "OK") return chalk.green("\u2705 OK");
|
|
374
412
|
if (level === "WARN") return chalk.yellow("\u26A0\uFE0F WARNING");
|
|
375
413
|
return chalk.red("\u{1F6A8} CRITICAL");
|
|
@@ -398,18 +436,25 @@ function printLagTable(snapshot, rcaResults = [], rateSnapshot, watchMode = fals
|
|
|
398
436
|
chalk.bold(" Collected At: ") + chalk.gray(`${localTime} (${tz})`)
|
|
399
437
|
);
|
|
400
438
|
console.log("");
|
|
401
|
-
const status = groupStatus(totalLag);
|
|
402
|
-
const totalStr = chalk.bold(formatLag(totalLag));
|
|
403
|
-
console.log(` Group Status : ${status} Total Lag : ${totalStr}`);
|
|
404
|
-
console.log("");
|
|
405
439
|
const hasRate = !!rateSnapshot && rateSnapshot.partitions.length > 0;
|
|
406
440
|
const hasTrend = watchMode;
|
|
407
441
|
const rateMap = /* @__PURE__ */ new Map();
|
|
442
|
+
let totalConsumeRate;
|
|
408
443
|
if (hasRate && rateSnapshot) {
|
|
444
|
+
let sum = 0;
|
|
409
445
|
for (const r of rateSnapshot.partitions) {
|
|
410
446
|
rateMap.set(`${r.topic}-${r.partition}`, r);
|
|
447
|
+
sum += r.consumeRate;
|
|
411
448
|
}
|
|
449
|
+
totalConsumeRate = sum;
|
|
412
450
|
}
|
|
451
|
+
const status = groupStatus(totalLag, totalConsumeRate);
|
|
452
|
+
const totalStr = chalk.bold(formatLag(totalLag));
|
|
453
|
+
const drainStr = totalConsumeRate !== void 0 ? ` Drain : ${chalk.cyan(formatDrainTime(totalLag, totalConsumeRate))}` : "";
|
|
454
|
+
console.log(
|
|
455
|
+
` Group Status : ${status} Total Lag : ${totalStr}${drainStr}`
|
|
456
|
+
);
|
|
457
|
+
console.log("");
|
|
413
458
|
const head = [
|
|
414
459
|
chalk.bold("Topic"),
|
|
415
460
|
chalk.bold("Partition"),
|
|
@@ -418,7 +463,11 @@ function printLagTable(snapshot, rcaResults = [], rateSnapshot, watchMode = fals
|
|
|
418
463
|
chalk.bold("Lag"),
|
|
419
464
|
...hasTrend ? [chalk.bold("Trend")] : [],
|
|
420
465
|
chalk.bold("Status"),
|
|
421
|
-
...hasRate ? [
|
|
466
|
+
...hasRate ? [
|
|
467
|
+
chalk.bold("Drain"),
|
|
468
|
+
chalk.bold("Produce Rate"),
|
|
469
|
+
chalk.bold("Consume Rate")
|
|
470
|
+
] : []
|
|
422
471
|
];
|
|
423
472
|
const table = new Table({
|
|
424
473
|
head,
|
|
@@ -430,16 +479,19 @@ function printLagTable(snapshot, rcaResults = [], rateSnapshot, watchMode = fals
|
|
|
430
479
|
"right",
|
|
431
480
|
...hasTrend ? ["right"] : [],
|
|
432
481
|
"center",
|
|
433
|
-
...hasRate ? ["right", "right"] : []
|
|
482
|
+
...hasRate ? ["right", "right", "right"] : []
|
|
434
483
|
],
|
|
435
484
|
style: { head: [], border: ["grey"] }
|
|
436
485
|
});
|
|
437
486
|
let lastTopic = "";
|
|
438
487
|
for (const p of partitions) {
|
|
439
|
-
const level = classifyLag(p.lag);
|
|
440
|
-
const lagStr = level === "HIGH" ? chalk.red(formatLag(p.lag)) : level === "WARN" ? chalk.yellow(formatLag(p.lag)) : chalk.green(formatLag(p.lag));
|
|
441
488
|
const rateEntry = rateMap.get(`${p.topic}-${p.partition}`);
|
|
489
|
+
const level = classifyLag(p.lag, rateEntry?.consumeRate);
|
|
490
|
+
const lagStr = level === "HIGH" ? chalk.red(formatLag(p.lag)) : level === "WARN" ? chalk.yellow(formatLag(p.lag)) : chalk.green(formatLag(p.lag));
|
|
442
491
|
const rateColumns = hasRate ? [
|
|
492
|
+
chalk.cyan(
|
|
493
|
+
rateEntry !== void 0 ? formatDrainTime(p.lag, rateEntry.consumeRate) : "\u2014"
|
|
494
|
+
),
|
|
443
495
|
chalk.yellow(formatRate(rateEntry?.produceRate ?? 0)),
|
|
444
496
|
chalk.cyan(formatRate(rateEntry?.consumeRate ?? 0))
|
|
445
497
|
] : [];
|
|
@@ -470,8 +522,138 @@ function printLagTable(snapshot, rcaResults = [], rateSnapshot, watchMode = fals
|
|
|
470
522
|
}
|
|
471
523
|
}
|
|
472
524
|
|
|
525
|
+
// src/cli/authBuilder.ts
|
|
526
|
+
import chalk2 from "chalk";
|
|
527
|
+
function buildAuthOptions(raw) {
|
|
528
|
+
const result = {};
|
|
529
|
+
if (raw.ssl || raw.sslCa || raw.sslCert || raw.sslKey) {
|
|
530
|
+
result.ssl = {
|
|
531
|
+
enabled: true,
|
|
532
|
+
...raw.sslCa && { caPath: raw.sslCa },
|
|
533
|
+
...raw.sslCert && { certPath: raw.sslCert },
|
|
534
|
+
...raw.sslKey && { keyPath: raw.sslKey }
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
if (raw.saslMechanism) {
|
|
538
|
+
if (!raw.saslUsername) {
|
|
539
|
+
throw new Error(
|
|
540
|
+
"--sasl-username is required when --sasl-mechanism is specified."
|
|
541
|
+
);
|
|
542
|
+
}
|
|
543
|
+
const password = resolvePassword(raw.saslPassword);
|
|
544
|
+
result.sasl = {
|
|
545
|
+
mechanism: raw.saslMechanism,
|
|
546
|
+
username: raw.saslUsername,
|
|
547
|
+
password
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
return result;
|
|
551
|
+
}
|
|
552
|
+
function resolvePassword(cliPassword) {
|
|
553
|
+
const envPassword = process.env.KLAG_SASL_PASSWORD;
|
|
554
|
+
if (envPassword) {
|
|
555
|
+
return envPassword;
|
|
556
|
+
}
|
|
557
|
+
if (cliPassword) {
|
|
558
|
+
console.error(
|
|
559
|
+
chalk2.yellow(
|
|
560
|
+
"\n\u26A0 Warning: --sasl-password passed via CLI argument.\n This may be visible in process listings (ps aux).\n Consider using the KLAG_SASL_PASSWORD environment variable instead.\n"
|
|
561
|
+
)
|
|
562
|
+
);
|
|
563
|
+
return cliPassword;
|
|
564
|
+
}
|
|
565
|
+
throw new Error(
|
|
566
|
+
"SASL password is required.\n Set the KLAG_SASL_PASSWORD environment variable or use --sasl-password."
|
|
567
|
+
);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// src/cli/configLoader.ts
|
|
571
|
+
import { existsSync, readFileSync as readFileSync2 } from "fs";
|
|
572
|
+
import { homedir } from "os";
|
|
573
|
+
import { join } from "path";
|
|
574
|
+
import chalk3 from "chalk";
|
|
575
|
+
var RC_FILENAME = ".klagrc";
|
|
576
|
+
var KNOWN_KEYS = [
|
|
577
|
+
"broker",
|
|
578
|
+
"group",
|
|
579
|
+
"interval",
|
|
580
|
+
"timeout",
|
|
581
|
+
"ssl",
|
|
582
|
+
"sasl"
|
|
583
|
+
];
|
|
584
|
+
function loadConfig() {
|
|
585
|
+
const candidates = [
|
|
586
|
+
join(process.cwd(), RC_FILENAME),
|
|
587
|
+
join(homedir(), RC_FILENAME)
|
|
588
|
+
];
|
|
589
|
+
for (const filePath of candidates) {
|
|
590
|
+
if (existsSync(filePath)) {
|
|
591
|
+
return { config: parseConfig(filePath), loadedFrom: filePath };
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
return null;
|
|
595
|
+
}
|
|
596
|
+
function parseConfig(filePath) {
|
|
597
|
+
let raw;
|
|
598
|
+
try {
|
|
599
|
+
raw = JSON.parse(readFileSync2(filePath, "utf-8"));
|
|
600
|
+
} catch {
|
|
601
|
+
throw new Error(
|
|
602
|
+
`Failed to parse ${filePath}
|
|
603
|
+
Make sure it contains valid JSON.`
|
|
604
|
+
);
|
|
605
|
+
}
|
|
606
|
+
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
607
|
+
throw new Error(`${filePath} must be a JSON object.`);
|
|
608
|
+
}
|
|
609
|
+
const obj = raw;
|
|
610
|
+
const unknownKeys = Object.keys(obj).filter(
|
|
611
|
+
(k) => !KNOWN_KEYS.includes(k)
|
|
612
|
+
);
|
|
613
|
+
if (unknownKeys.length > 0) {
|
|
614
|
+
console.error(
|
|
615
|
+
chalk3.yellow(
|
|
616
|
+
`
|
|
617
|
+
\u26A0 Unknown key(s) in ${filePath}: ${unknownKeys.join(", ")}
|
|
618
|
+
`
|
|
619
|
+
)
|
|
620
|
+
);
|
|
621
|
+
}
|
|
622
|
+
if (obj.broker !== void 0 && typeof obj.broker !== "string") {
|
|
623
|
+
throw new Error(`${filePath}: "broker" must be a string.`);
|
|
624
|
+
}
|
|
625
|
+
if (obj.group !== void 0 && typeof obj.group !== "string") {
|
|
626
|
+
throw new Error(`${filePath}: "group" must be a string.`);
|
|
627
|
+
}
|
|
628
|
+
if (obj.interval !== void 0 && typeof obj.interval !== "number") {
|
|
629
|
+
throw new Error(`${filePath}: "interval" must be a number.`);
|
|
630
|
+
}
|
|
631
|
+
if (obj.timeout !== void 0 && typeof obj.timeout !== "number") {
|
|
632
|
+
throw new Error(`${filePath}: "timeout" must be a number.`);
|
|
633
|
+
}
|
|
634
|
+
const sasl = obj.sasl;
|
|
635
|
+
if (sasl?.password) {
|
|
636
|
+
console.error(
|
|
637
|
+
chalk3.yellow(
|
|
638
|
+
`
|
|
639
|
+
\u26A0 Warning: SASL password found in ${filePath}.
|
|
640
|
+
Storing passwords in config files is not recommended.
|
|
641
|
+
Consider using the KLAG_SASL_PASSWORD environment variable instead.
|
|
642
|
+
`
|
|
643
|
+
)
|
|
644
|
+
);
|
|
645
|
+
}
|
|
646
|
+
return obj;
|
|
647
|
+
}
|
|
648
|
+
|
|
473
649
|
// src/cli/validators.ts
|
|
650
|
+
import { existsSync as existsSync2 } from "fs";
|
|
474
651
|
import { InvalidArgumentError } from "commander";
|
|
652
|
+
var VALID_SASL_MECHANISMS = [
|
|
653
|
+
"plain",
|
|
654
|
+
"scram-sha-256",
|
|
655
|
+
"scram-sha-512"
|
|
656
|
+
];
|
|
475
657
|
function parseInterval(value) {
|
|
476
658
|
const parsed = parseInt(value, 10);
|
|
477
659
|
if (Number.isNaN(parsed) || parsed < 1e3) {
|
|
@@ -501,9 +683,23 @@ function parseTimeout(value) {
|
|
|
501
683
|
}
|
|
502
684
|
return parsed;
|
|
503
685
|
}
|
|
686
|
+
function parseSaslMechanism(value) {
|
|
687
|
+
if (!VALID_SASL_MECHANISMS.includes(value)) {
|
|
688
|
+
throw new InvalidArgumentError(
|
|
689
|
+
`--sasl-mechanism must be one of: ${VALID_SASL_MECHANISMS.join(", ")}.`
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
return value;
|
|
693
|
+
}
|
|
694
|
+
function parseCertPath(value) {
|
|
695
|
+
if (!existsSync2(value)) {
|
|
696
|
+
throw new InvalidArgumentError(`Certificate file not found: ${value}`);
|
|
697
|
+
}
|
|
698
|
+
return value;
|
|
699
|
+
}
|
|
504
700
|
|
|
505
701
|
// src/cli/watcher.ts
|
|
506
|
-
import
|
|
702
|
+
import chalk4 from "chalk";
|
|
507
703
|
var MAX_RETRIES = 3;
|
|
508
704
|
function clearScreen() {
|
|
509
705
|
process.stdout.write("\x1Bc");
|
|
@@ -519,31 +715,31 @@ function printWatchHeader(intervalMs, updatedAt) {
|
|
|
519
715
|
hour12: false
|
|
520
716
|
});
|
|
521
717
|
console.log(
|
|
522
|
-
|
|
718
|
+
chalk4.bold.cyan("\u26A1 klag") + chalk4.gray(` v${VERSION}`) + " \u2502 " + chalk4.yellow("watch mode") + " \u2502 " + chalk4.gray(`${intervalSec}s refresh`) + " \u2502 " + chalk4.gray("Ctrl+C to exit")
|
|
523
719
|
);
|
|
524
|
-
console.log(
|
|
720
|
+
console.log(chalk4.gray(` Last updated: ${timeStr} (${tz})`));
|
|
525
721
|
}
|
|
526
722
|
function printWatchError(message, retryCount, retryIn) {
|
|
527
723
|
clearScreen();
|
|
528
724
|
console.log(
|
|
529
|
-
|
|
725
|
+
chalk4.bold.cyan("\u26A1 klag") + chalk4.gray(` v${VERSION}`) + " \u2502 " + chalk4.yellow("watch mode") + " \u2502 " + chalk4.gray("Ctrl+C to exit")
|
|
530
726
|
);
|
|
531
727
|
console.log("");
|
|
532
|
-
console.error(
|
|
728
|
+
console.error(chalk4.red(` \u274C Error: ${message}`));
|
|
533
729
|
console.log(
|
|
534
|
-
|
|
730
|
+
chalk4.yellow(` Retrying ${retryCount}/${MAX_RETRIES}... in ${retryIn}s`)
|
|
535
731
|
);
|
|
536
732
|
console.log("");
|
|
537
733
|
}
|
|
538
734
|
function printWatchFatal(message) {
|
|
539
735
|
clearScreen();
|
|
540
736
|
console.log(
|
|
541
|
-
|
|
737
|
+
chalk4.bold.cyan("\u26A1 klag") + chalk4.gray(` v${VERSION}`) + " \u2502 " + chalk4.yellow("watch mode")
|
|
542
738
|
);
|
|
543
739
|
console.log("");
|
|
544
|
-
console.error(
|
|
740
|
+
console.error(chalk4.red(` \u274C Error: ${message}`));
|
|
545
741
|
console.error(
|
|
546
|
-
|
|
742
|
+
chalk4.red(` All ${MAX_RETRIES} retries failed \u2014 exiting watch mode`)
|
|
547
743
|
);
|
|
548
744
|
console.log("");
|
|
549
745
|
}
|
|
@@ -566,7 +762,7 @@ async function runOnce(options, noRate, previous) {
|
|
|
566
762
|
const topics = [...new Set(snapshot.partitions.map((p) => p.topic))];
|
|
567
763
|
const waitSec = (options.intervalMs ?? 5e3) / 1e3;
|
|
568
764
|
process.stdout.write(
|
|
569
|
-
|
|
765
|
+
chalk4.gray(` Sampling rates... (waiting ${waitSec}s) `)
|
|
570
766
|
);
|
|
571
767
|
rateSnapshot = await collectRate(options, topics);
|
|
572
768
|
process.stdout.write(`\r${" ".repeat(50)}\r`);
|
|
@@ -583,7 +779,7 @@ function printCountdown(seconds) {
|
|
|
583
779
|
let remaining = seconds;
|
|
584
780
|
const tick = () => {
|
|
585
781
|
process.stdout.write(
|
|
586
|
-
`\r${
|
|
782
|
+
`\r${chalk4.gray(` [\u25CF] Next refresh in ${remaining}s...`)} `
|
|
587
783
|
);
|
|
588
784
|
if (remaining === 0) {
|
|
589
785
|
process.stdout.write(`\r${" ".repeat(40)}\r`);
|
|
@@ -608,12 +804,12 @@ function getFriendlyMessage(err, broker) {
|
|
|
608
804
|
}
|
|
609
805
|
async function startWatch(options, noRate) {
|
|
610
806
|
process.on("SIGINT", () => {
|
|
611
|
-
console.log(
|
|
807
|
+
console.log(chalk4.gray("\n\n Watch mode exited\n"));
|
|
612
808
|
process.exit(0);
|
|
613
809
|
});
|
|
614
810
|
const intervalMs = options.intervalMs ?? 5e3;
|
|
615
811
|
const waitSec = Math.ceil(intervalMs / 1e3);
|
|
616
|
-
process.stdout.write(
|
|
812
|
+
process.stdout.write(chalk4.gray(" Connecting to broker..."));
|
|
617
813
|
let errorCount = 0;
|
|
618
814
|
let previousSnapshot;
|
|
619
815
|
while (true) {
|
|
@@ -652,19 +848,52 @@ program.name("klag").description("Kafka consumer lag root cause analyzer").versi
|
|
|
652
848
|
).option("-w, --watch", "Watch mode \u2014 refresh every interval").option("-t, --timeout <ms>", "Connection timeout in ms", parseTimeout, 5e3).option(
|
|
653
849
|
"--no-rate",
|
|
654
850
|
"Skip rate sampling (faster, no PRODUCER_BURST detection)"
|
|
655
|
-
).option("--json", "Output raw JSON instead of table").
|
|
851
|
+
).option("--json", "Output raw JSON instead of table").option("--ssl", "Enable SSL/TLS (uses system CA trust)").option("--ssl-ca <path>", "Path to CA certificate PEM file", parseCertPath).option(
|
|
852
|
+
"--ssl-cert <path>",
|
|
853
|
+
"Path to client certificate PEM file",
|
|
854
|
+
parseCertPath
|
|
855
|
+
).option("--ssl-key <path>", "Path to client key PEM file", parseCertPath).option(
|
|
856
|
+
"--sasl-mechanism <mechanism>",
|
|
857
|
+
"SASL mechanism: plain, scram-sha-256, scram-sha-512",
|
|
858
|
+
parseSaslMechanism
|
|
859
|
+
).option("--sasl-username <username>", "SASL username").option(
|
|
860
|
+
"--sasl-password <password>",
|
|
861
|
+
"SASL password (prefer KLAG_SASL_PASSWORD env var)"
|
|
862
|
+
).action(async (options) => {
|
|
656
863
|
try {
|
|
864
|
+
const loaded = loadConfig();
|
|
865
|
+
const rc = loaded?.config ?? {};
|
|
866
|
+
if (loaded) {
|
|
867
|
+
process.stderr.write(
|
|
868
|
+
chalk5.gray(` Using config: ${loaded.loadedFrom}
|
|
869
|
+
`)
|
|
870
|
+
);
|
|
871
|
+
}
|
|
872
|
+
const broker = options.broker !== "localhost:9092" ? options.broker : rc.broker ?? options.broker;
|
|
873
|
+
const groupId = options.group ?? rc.group;
|
|
874
|
+
const intervalMs = options.interval !== 5e3 ? options.interval : rc.interval ?? options.interval;
|
|
875
|
+
const timeoutMs = options.timeout !== 5e3 ? options.timeout : rc.timeout ?? options.timeout;
|
|
876
|
+
const auth = buildAuthOptions({
|
|
877
|
+
ssl: options.ssl || rc.ssl?.enabled,
|
|
878
|
+
sslCa: options.sslCa ?? rc.ssl?.caPath,
|
|
879
|
+
sslCert: options.sslCert ?? rc.ssl?.certPath,
|
|
880
|
+
sslKey: options.sslKey ?? rc.ssl?.keyPath,
|
|
881
|
+
saslMechanism: options.saslMechanism ?? rc.sasl?.mechanism,
|
|
882
|
+
saslUsername: options.saslUsername ?? rc.sasl?.username,
|
|
883
|
+
saslPassword: options.saslPassword ?? rc.sasl?.password
|
|
884
|
+
});
|
|
657
885
|
const kafkaOptions = {
|
|
658
|
-
broker
|
|
659
|
-
groupId
|
|
660
|
-
intervalMs
|
|
661
|
-
timeoutMs
|
|
886
|
+
broker,
|
|
887
|
+
groupId,
|
|
888
|
+
intervalMs,
|
|
889
|
+
timeoutMs,
|
|
890
|
+
...auth
|
|
662
891
|
};
|
|
663
892
|
if (options.watch) {
|
|
664
893
|
await startWatch(kafkaOptions, options.rate === false);
|
|
665
894
|
return;
|
|
666
895
|
}
|
|
667
|
-
process.stdout.write(
|
|
896
|
+
process.stdout.write(chalk5.gray(" Connecting to broker..."));
|
|
668
897
|
const snapshot = await collectLag(kafkaOptions);
|
|
669
898
|
process.stdout.write(`\r${" ".repeat(50)}\r`);
|
|
670
899
|
let rateSnapshot;
|
|
@@ -672,7 +901,7 @@ program.name("klag").description("Kafka consumer lag root cause analyzer").versi
|
|
|
672
901
|
const topics = [...new Set(snapshot.partitions.map((p) => p.topic))];
|
|
673
902
|
const waitSec = (kafkaOptions.intervalMs ?? 5e3) / 1e3;
|
|
674
903
|
process.stdout.write(
|
|
675
|
-
|
|
904
|
+
chalk5.gray(` Sampling rates... (waiting ${waitSec}s) `)
|
|
676
905
|
);
|
|
677
906
|
rateSnapshot = await collectRate(kafkaOptions, topics);
|
|
678
907
|
process.stdout.write(`\r${" ".repeat(50)}\r`);
|
|
@@ -700,36 +929,55 @@ program.name("klag").description("Kafka consumer lag root cause analyzer").versi
|
|
|
700
929
|
process.stdout.write(`\r${" ".repeat(50)}\r`);
|
|
701
930
|
const message = err instanceof Error ? err.message : String(err);
|
|
702
931
|
if (message.includes("ECONNREFUSED") || message.includes("ETIMEDOUT") || message.includes("Connection error") || message.includes("connect ECONNREFUSED")) {
|
|
703
|
-
console.error(
|
|
932
|
+
console.error(chalk5.red(`
|
|
704
933
|
\u274C Cannot connect to broker
|
|
705
934
|
`));
|
|
706
|
-
console.error(
|
|
707
|
-
console.error(
|
|
708
|
-
console.error(
|
|
935
|
+
console.error(chalk5.yellow(" Check the following:"));
|
|
936
|
+
console.error(chalk5.gray(` \u2022 Is Kafka running: docker ps`));
|
|
937
|
+
console.error(chalk5.gray(` \u2022 Broker address: ${options.broker}`));
|
|
709
938
|
console.error(
|
|
710
|
-
|
|
939
|
+
chalk5.gray(
|
|
711
940
|
` \u2022 Port accessibility: nc -zv ${options.broker.split(":")[0]} ${options.broker.split(":")[1]}`
|
|
712
941
|
)
|
|
713
942
|
);
|
|
714
943
|
console.error("");
|
|
715
944
|
process.exit(1);
|
|
716
945
|
}
|
|
946
|
+
if (message.includes("SASLAuthenticationFailed") || message.includes("Authentication failed") || message.includes("SASL")) {
|
|
947
|
+
console.error(chalk5.red(`
|
|
948
|
+
\u274C SASL authentication failed
|
|
949
|
+
`));
|
|
950
|
+
console.error(chalk5.yellow(" Check the following:"));
|
|
951
|
+
console.error(
|
|
952
|
+
chalk5.gray(` \u2022 Mechanism: ${options.saslMechanism ?? "(none)"}`)
|
|
953
|
+
);
|
|
954
|
+
console.error(
|
|
955
|
+
chalk5.gray(` \u2022 Username: ${options.saslUsername ?? "(none)"}`)
|
|
956
|
+
);
|
|
957
|
+
console.error(
|
|
958
|
+
chalk5.gray(
|
|
959
|
+
` \u2022 Password: set via KLAG_SASL_PASSWORD or --sasl-password`
|
|
960
|
+
)
|
|
961
|
+
);
|
|
962
|
+
console.error("");
|
|
963
|
+
process.exit(1);
|
|
964
|
+
}
|
|
717
965
|
if (message.includes("not found") || message.includes("Dead state")) {
|
|
718
|
-
console.error(
|
|
966
|
+
console.error(chalk5.red(`
|
|
719
967
|
\u274C Consumer group not found
|
|
720
968
|
`));
|
|
721
|
-
console.error(
|
|
722
|
-
console.error(
|
|
723
|
-
console.error(
|
|
969
|
+
console.error(chalk5.yellow(" Check the following:"));
|
|
970
|
+
console.error(chalk5.gray(` \u2022 Group ID: ${options.group}`));
|
|
971
|
+
console.error(chalk5.gray(` \u2022 List existing groups:`));
|
|
724
972
|
console.error(
|
|
725
|
-
|
|
973
|
+
chalk5.gray(
|
|
726
974
|
` kafka-consumer-groups.sh --bootstrap-server ${options.broker} --list`
|
|
727
975
|
)
|
|
728
976
|
);
|
|
729
977
|
console.error("");
|
|
730
978
|
process.exit(1);
|
|
731
979
|
}
|
|
732
|
-
console.error(
|
|
980
|
+
console.error(chalk5.red(`
|
|
733
981
|
\u274C Error: ${message}
|
|
734
982
|
`));
|
|
735
983
|
process.exit(1);
|