fast-ops-fix-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fast-ops-fix.js +152 -0
- package/package.json +35 -0
- package/src/analyze.js +192 -0
- package/src/output.js +43 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import process from "node:process";
|
|
4
|
+
import { analyzeIncident } from "../src/analyze.js";
|
|
5
|
+
import { formatTextReport } from "../src/output.js";
|
|
6
|
+
|
|
7
|
+
const VERSION = "0.1.0";
|
|
8
|
+
|
|
9
|
+
function printUsage() {
|
|
10
|
+
console.log(`fast-ops-fix v${VERSION}
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
fast-ops-fix --file <logfile> [--json] [--max-evidence <n>]
|
|
14
|
+
fast-ops-fix <logfile> [--json]
|
|
15
|
+
cat app.log | fast-ops-fix [--json]
|
|
16
|
+
|
|
17
|
+
Options:
|
|
18
|
+
-f, --file <path> Read logs from file
|
|
19
|
+
--json Emit machine-readable JSON output
|
|
20
|
+
--max-evidence <n> Evidence lines per signal (default: 3)
|
|
21
|
+
-v, --version Print version
|
|
22
|
+
-h, --help Show this help
|
|
23
|
+
`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function parseArgs(argv) {
|
|
27
|
+
const options = {
|
|
28
|
+
file: null,
|
|
29
|
+
json: false,
|
|
30
|
+
maxEvidence: 3,
|
|
31
|
+
help: false,
|
|
32
|
+
version: false
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const positionals = [];
|
|
36
|
+
|
|
37
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
38
|
+
const token = argv[i];
|
|
39
|
+
|
|
40
|
+
if (token === "-h" || token === "--help") {
|
|
41
|
+
options.help = true;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (token === "-v" || token === "--version") {
|
|
46
|
+
options.version = true;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (token === "--json") {
|
|
51
|
+
options.json = true;
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (token === "-f" || token === "--file") {
|
|
56
|
+
const value = argv[i + 1];
|
|
57
|
+
if (!value) {
|
|
58
|
+
throw new Error("--file requires a value");
|
|
59
|
+
}
|
|
60
|
+
options.file = value;
|
|
61
|
+
i += 1;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (token === "--max-evidence") {
|
|
66
|
+
const value = argv[i + 1];
|
|
67
|
+
if (!value || !/^\d+$/.test(value) || Number(value) < 1) {
|
|
68
|
+
throw new Error("--max-evidence requires a positive integer");
|
|
69
|
+
}
|
|
70
|
+
options.maxEvidence = Number(value);
|
|
71
|
+
i += 1;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (token.startsWith("-")) {
|
|
76
|
+
throw new Error(`Unknown option: ${token}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
positionals.push(token);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (!options.file && positionals.length > 0) {
|
|
83
|
+
options.file = positionals[0];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (positionals.length > 1) {
|
|
87
|
+
throw new Error("Only one logfile path may be provided");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return options;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async function readFromStdin() {
|
|
94
|
+
let data = "";
|
|
95
|
+
for await (const chunk of process.stdin) {
|
|
96
|
+
data += chunk;
|
|
97
|
+
}
|
|
98
|
+
return data;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
async function resolveInput(filePath) {
|
|
102
|
+
if (filePath) {
|
|
103
|
+
return readFile(filePath, "utf8");
|
|
104
|
+
}
|
|
105
|
+
if (!process.stdin.isTTY) {
|
|
106
|
+
return readFromStdin();
|
|
107
|
+
}
|
|
108
|
+
return "";
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function main() {
|
|
112
|
+
let options;
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
options = parseArgs(process.argv.slice(2));
|
|
116
|
+
} catch (error) {
|
|
117
|
+
console.error(`Argument error: ${error.message}`);
|
|
118
|
+
printUsage();
|
|
119
|
+
process.exit(1);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (options.help) {
|
|
123
|
+
printUsage();
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (options.version) {
|
|
128
|
+
console.log(VERSION);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const input = await resolveInput(options.file);
|
|
133
|
+
if (!input || input.trim().length === 0) {
|
|
134
|
+
console.error("No input provided. Pass --file <logfile> or pipe logs via stdin.");
|
|
135
|
+
printUsage();
|
|
136
|
+
process.exit(1);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const report = analyzeIncident(input, { maxEvidence: options.maxEvidence });
|
|
140
|
+
|
|
141
|
+
if (options.json) {
|
|
142
|
+
console.log(JSON.stringify(report, null, 2));
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
console.log(formatTextReport(report));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
main().catch((error) => {
|
|
150
|
+
console.error(`Execution error: ${error.message}`);
|
|
151
|
+
process.exit(1);
|
|
152
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "fast-ops-fix-cli",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "CLI for incident first-response triage from web service logs.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./src/analyze.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/analyze.js"
|
|
9
|
+
},
|
|
10
|
+
"bin": {
|
|
11
|
+
"fast-ops-fix": "bin/fast-ops-fix.js"
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"bin",
|
|
15
|
+
"src"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"lint": "node --check ./bin/fast-ops-fix.js && node --check ./src/analyze.js && node --check ./src/output.js",
|
|
19
|
+
"test": "node --test"
|
|
20
|
+
},
|
|
21
|
+
"keywords": [
|
|
22
|
+
"incident",
|
|
23
|
+
"triage",
|
|
24
|
+
"ops",
|
|
25
|
+
"sre",
|
|
26
|
+
"logs",
|
|
27
|
+
"502",
|
|
28
|
+
"504"
|
|
29
|
+
],
|
|
30
|
+
"author": "autonomy414941",
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"engines": {
|
|
33
|
+
"node": ">=20"
|
|
34
|
+
}
|
|
35
|
+
}
|
package/src/analyze.js
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
const RULES = [
|
|
2
|
+
{
|
|
3
|
+
id: "upstream_unreachable",
|
|
4
|
+
title: "Upstream process unavailable or wrong target",
|
|
5
|
+
weight: 10,
|
|
6
|
+
pattern:
|
|
7
|
+
/(connection refused|econnrefused|no route to host|host unreachable|upstream prematurely closed connection|connect\(\) failed \(111|dial tcp .*: connect: connection refused)/i,
|
|
8
|
+
nextChecks: [
|
|
9
|
+
"Check the upstream process/container is running and bound to the expected host:port.",
|
|
10
|
+
"Run: ss -lntp | grep -E ':(80|443|3000|8000|8080|9000)\\b'",
|
|
11
|
+
"Run: curl -sv http://127.0.0.1:<app-port>/health"
|
|
12
|
+
],
|
|
13
|
+
possibleFixes: [
|
|
14
|
+
"Fix upstream host/port in proxy config.",
|
|
15
|
+
"Restart failed upstream process after confirming root cause.",
|
|
16
|
+
"Add or fix readiness checks so proxy only routes to healthy targets."
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
id: "gateway_timeout",
|
|
21
|
+
title: "Gateway/proxy timeout on long request",
|
|
22
|
+
weight: 8,
|
|
23
|
+
pattern:
|
|
24
|
+
/(504\b|gateway timeout|upstream timed out|context deadline exceeded|response header timeout|read timeout|operation timed out|deadline exceeded)/i,
|
|
25
|
+
nextChecks: [
|
|
26
|
+
"Measure direct upstream latency bypassing the gateway.",
|
|
27
|
+
"Compare proxy timeout values against real endpoint execution time.",
|
|
28
|
+
"Run: time curl -sv http://127.0.0.1:<app-port>/<slow-endpoint>"
|
|
29
|
+
],
|
|
30
|
+
possibleFixes: [
|
|
31
|
+
"Increase proxy timeout only for affected routes.",
|
|
32
|
+
"Move expensive sync work to async/background processing.",
|
|
33
|
+
"Add indexes/caching for slow backend queries."
|
|
34
|
+
]
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
id: "bad_gateway_generic",
|
|
38
|
+
title: "Proxy receives invalid upstream response",
|
|
39
|
+
weight: 6,
|
|
40
|
+
pattern: /(502\b|bad gateway|err_incomplete_chunked_encoding)/i,
|
|
41
|
+
nextChecks: [
|
|
42
|
+
"Correlate proxy access/error logs with app logs at the same UTC timestamp.",
|
|
43
|
+
"Verify upstream response headers and transfer encoding are valid.",
|
|
44
|
+
"Run: curl -sv --http1.1 http://127.0.0.1:<app-port>/<path>"
|
|
45
|
+
],
|
|
46
|
+
possibleFixes: [
|
|
47
|
+
"Fix upstream crashes/panics that terminate responses early.",
|
|
48
|
+
"Disable/adjust buffering or chunked encoding behavior for streaming endpoints.",
|
|
49
|
+
"Align proxy and upstream protocol settings (HTTP/1.1 vs HTTP/2)."
|
|
50
|
+
]
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
id: "oom_or_restart",
|
|
54
|
+
title: "Upstream process restart or memory pressure",
|
|
55
|
+
weight: 8,
|
|
56
|
+
pattern:
|
|
57
|
+
/(oomkilled|out of memory|killed process \d+|signal: killed|container restarted|exit code 137|memory cgroup out of memory)/i,
|
|
58
|
+
nextChecks: [
|
|
59
|
+
"Inspect container/service restart count around failure timestamp.",
|
|
60
|
+
"Check kernel/system logs for OOM kills.",
|
|
61
|
+
"Run: journalctl -k --since '-30 min' | grep -Ei 'oom|killed process'"
|
|
62
|
+
],
|
|
63
|
+
possibleFixes: [
|
|
64
|
+
"Increase memory limit or reduce peak memory usage.",
|
|
65
|
+
"Cap concurrency or request payload size for heavy endpoints.",
|
|
66
|
+
"Add graceful backpressure instead of allowing process termination."
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
id: "dns_or_tls",
|
|
71
|
+
title: "DNS or TLS handshake failure between components",
|
|
72
|
+
weight: 7,
|
|
73
|
+
pattern:
|
|
74
|
+
/(no such host|temporary failure in name resolution|tls handshake timeout|x509|certificate verify failed|unknown certificate authority|ssl routines|handshake failure)/i,
|
|
75
|
+
nextChecks: [
|
|
76
|
+
"Resolve hostname from the proxy runtime environment.",
|
|
77
|
+
"Validate certificate chain, SAN, and system trust store.",
|
|
78
|
+
"Run: openssl s_client -connect <host:port> -servername <host>"
|
|
79
|
+
],
|
|
80
|
+
possibleFixes: [
|
|
81
|
+
"Fix DNS records or container network DNS settings.",
|
|
82
|
+
"Replace/renew invalid certificates and correct SAN names.",
|
|
83
|
+
"Install proper CA bundle in runtime image."
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
id: "db_dependency_pressure",
|
|
88
|
+
title: "Database or dependency latency causing upstream failure",
|
|
89
|
+
weight: 7,
|
|
90
|
+
pattern:
|
|
91
|
+
/(too many connections|could not connect to server|connection pool exhausted|lock wait timeout|deadlock found|query timeout|timeout expired|redis timeout|i\/o timeout)/i,
|
|
92
|
+
nextChecks: [
|
|
93
|
+
"Check DB/Redis connection pool saturation and slow query logs.",
|
|
94
|
+
"Measure dependency latency and error rate during incident window.",
|
|
95
|
+
"Run: EXPLAIN ANALYZE on top slow query if applicable."
|
|
96
|
+
],
|
|
97
|
+
possibleFixes: [
|
|
98
|
+
"Add indexes or query rewrites for hot paths.",
|
|
99
|
+
"Tune pool size with server capacity constraints.",
|
|
100
|
+
"Introduce retries with jitter only for idempotent operations."
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
function normalizeLines(input) {
|
|
106
|
+
return input
|
|
107
|
+
.split(/\r?\n/)
|
|
108
|
+
.map((line) => line.trim())
|
|
109
|
+
.filter((line) => line.length > 0);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function summarizeConfidence(matches, score) {
|
|
113
|
+
if (matches >= 3 || score >= 24) {
|
|
114
|
+
return "high";
|
|
115
|
+
}
|
|
116
|
+
if (matches >= 2 || score >= 12) {
|
|
117
|
+
return "medium";
|
|
118
|
+
}
|
|
119
|
+
return "low";
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function analyzeIncident(input, options = {}) {
|
|
123
|
+
const maxEvidence = Number.isInteger(options.maxEvidence) && options.maxEvidence > 0
|
|
124
|
+
? options.maxEvidence
|
|
125
|
+
: 3;
|
|
126
|
+
|
|
127
|
+
const lines = normalizeLines(input);
|
|
128
|
+
const signalMap = new Map();
|
|
129
|
+
let matchedLineCount = 0;
|
|
130
|
+
|
|
131
|
+
for (const line of lines) {
|
|
132
|
+
let lineMatched = false;
|
|
133
|
+
|
|
134
|
+
for (const rule of RULES) {
|
|
135
|
+
if (!rule.pattern.test(line)) {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
lineMatched = true;
|
|
140
|
+
const existing = signalMap.get(rule.id) || {
|
|
141
|
+
id: rule.id,
|
|
142
|
+
title: rule.title,
|
|
143
|
+
score: 0,
|
|
144
|
+
matches: 0,
|
|
145
|
+
evidence: [],
|
|
146
|
+
nextChecks: rule.nextChecks,
|
|
147
|
+
possibleFixes: rule.possibleFixes
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
existing.matches += 1;
|
|
151
|
+
existing.score += rule.weight;
|
|
152
|
+
|
|
153
|
+
if (existing.evidence.length < maxEvidence && !existing.evidence.includes(line)) {
|
|
154
|
+
existing.evidence.push(line);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
signalMap.set(rule.id, existing);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (lineMatched) {
|
|
161
|
+
matchedLineCount += 1;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const signals = [...signalMap.values()]
|
|
166
|
+
.map((signal) => ({
|
|
167
|
+
...signal,
|
|
168
|
+
confidence: summarizeConfidence(signal.matches, signal.score)
|
|
169
|
+
}))
|
|
170
|
+
.sort((a, b) => {
|
|
171
|
+
if (b.score !== a.score) {
|
|
172
|
+
return b.score - a.score;
|
|
173
|
+
}
|
|
174
|
+
if (b.matches !== a.matches) {
|
|
175
|
+
return b.matches - a.matches;
|
|
176
|
+
}
|
|
177
|
+
return a.id.localeCompare(b.id);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
lineCount: lines.length,
|
|
182
|
+
matchedLineCount,
|
|
183
|
+
unmatchedLineCount: Math.max(lines.length - matchedLineCount, 0),
|
|
184
|
+
signals,
|
|
185
|
+
baselineChecklist: [
|
|
186
|
+
"Reproduce against upstream directly (bypass proxy) to isolate gateway vs app layer.",
|
|
187
|
+
"Align proxy and application logs by UTC timestamp for the same failing request.",
|
|
188
|
+
"Diff latest deploy: image tag, env vars, secrets, and upstream endpoint settings.",
|
|
189
|
+
"Validate health endpoint behavior and one safe rollback path before high-risk changes."
|
|
190
|
+
]
|
|
191
|
+
};
|
|
192
|
+
}
|
package/src/output.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
function formatSignal(signal, index) {
|
|
2
|
+
const evidenceLines = signal.evidence.map((line) => ` - ${line}`);
|
|
3
|
+
const checks = signal.nextChecks.map((line) => ` - ${line}`);
|
|
4
|
+
const fixes = signal.possibleFixes.map((line) => ` - ${line}`);
|
|
5
|
+
|
|
6
|
+
return [
|
|
7
|
+
`${index + 1}. ${signal.title}`,
|
|
8
|
+
` score: ${signal.score} | matches: ${signal.matches} | confidence: ${signal.confidence}`,
|
|
9
|
+
" evidence:",
|
|
10
|
+
...evidenceLines,
|
|
11
|
+
" next checks:",
|
|
12
|
+
...checks,
|
|
13
|
+
" possible fixes:",
|
|
14
|
+
...fixes
|
|
15
|
+
].join("\n");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function formatTextReport(result) {
|
|
19
|
+
const header = [
|
|
20
|
+
"Fast Ops Fix - Incident First Response",
|
|
21
|
+
`input lines: ${result.lineCount}`,
|
|
22
|
+
`matched lines: ${result.matchedLineCount}`,
|
|
23
|
+
""
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
if (result.signals.length === 0) {
|
|
27
|
+
return [
|
|
28
|
+
...header,
|
|
29
|
+
"No strong known incident signatures detected.",
|
|
30
|
+
"Run baseline checks:",
|
|
31
|
+
...result.baselineChecklist.map((item) => `- ${item}`)
|
|
32
|
+
].join("\n");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return [
|
|
36
|
+
...header,
|
|
37
|
+
"Prioritized signals:",
|
|
38
|
+
...result.signals.map((signal, index) => formatSignal(signal, index)),
|
|
39
|
+
"",
|
|
40
|
+
"Baseline checks:",
|
|
41
|
+
...result.baselineChecklist.map((item) => `- ${item}`)
|
|
42
|
+
].join("\n");
|
|
43
|
+
}
|