flow-debugger 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PORTFOLIO_README_SECTION.md +177 -0
- package/README.md +251 -0
- package/dashboard/app.js +339 -0
- package/dashboard/index.html +168 -0
- package/dashboard/style.css +846 -0
- package/dist/cjs/core/Analytics.js +174 -0
- package/dist/cjs/core/Analytics.js.map +1 -0
- package/dist/cjs/core/Classifier.js +66 -0
- package/dist/cjs/core/Classifier.js.map +1 -0
- package/dist/cjs/core/HealthMonitor.js +79 -0
- package/dist/cjs/core/HealthMonitor.js.map +1 -0
- package/dist/cjs/core/RootCause.js +89 -0
- package/dist/cjs/core/RootCause.js.map +1 -0
- package/dist/cjs/core/Sampler.js +34 -0
- package/dist/cjs/core/Sampler.js.map +1 -0
- package/dist/cjs/core/Timeline.js +90 -0
- package/dist/cjs/core/Timeline.js.map +1 -0
- package/dist/cjs/core/TraceEngine.js +222 -0
- package/dist/cjs/core/TraceEngine.js.map +1 -0
- package/dist/cjs/core/types.js +21 -0
- package/dist/cjs/core/types.js.map +1 -0
- package/dist/cjs/index.js +46 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/integrations/axios.js +136 -0
- package/dist/cjs/integrations/axios.js.map +1 -0
- package/dist/cjs/integrations/fetch.js +153 -0
- package/dist/cjs/integrations/fetch.js.map +1 -0
- package/dist/cjs/integrations/mongo.js +111 -0
- package/dist/cjs/integrations/mongo.js.map +1 -0
- package/dist/cjs/integrations/mysql.js +212 -0
- package/dist/cjs/integrations/mysql.js.map +1 -0
- package/dist/cjs/integrations/postgres.js +182 -0
- package/dist/cjs/integrations/postgres.js.map +1 -0
- package/dist/cjs/integrations/redis.js +105 -0
- package/dist/cjs/integrations/redis.js.map +1 -0
- package/dist/cjs/middleware/express.js +255 -0
- package/dist/cjs/middleware/express.js.map +1 -0
- package/dist/esm/core/Analytics.js +170 -0
- package/dist/esm/core/Analytics.js.map +1 -0
- package/dist/esm/core/Classifier.js +61 -0
- package/dist/esm/core/Classifier.js.map +1 -0
- package/dist/esm/core/HealthMonitor.js +75 -0
- package/dist/esm/core/HealthMonitor.js.map +1 -0
- package/dist/esm/core/RootCause.js +86 -0
- package/dist/esm/core/RootCause.js.map +1 -0
- package/dist/esm/core/Sampler.js +30 -0
- package/dist/esm/core/Sampler.js.map +1 -0
- package/dist/esm/core/Timeline.js +86 -0
- package/dist/esm/core/Timeline.js.map +1 -0
- package/dist/esm/core/TraceEngine.js +217 -0
- package/dist/esm/core/TraceEngine.js.map +1 -0
- package/dist/esm/core/types.js +18 -0
- package/dist/esm/core/types.js.map +1 -0
- package/dist/esm/index.js +22 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/integrations/axios.js +133 -0
- package/dist/esm/integrations/axios.js.map +1 -0
- package/dist/esm/integrations/fetch.js +149 -0
- package/dist/esm/integrations/fetch.js.map +1 -0
- package/dist/esm/integrations/mongo.js +107 -0
- package/dist/esm/integrations/mongo.js.map +1 -0
- package/dist/esm/integrations/mysql.js +209 -0
- package/dist/esm/integrations/mysql.js.map +1 -0
- package/dist/esm/integrations/postgres.js +179 -0
- package/dist/esm/integrations/postgres.js.map +1 -0
- package/dist/esm/integrations/redis.js +102 -0
- package/dist/esm/integrations/redis.js.map +1 -0
- package/dist/esm/middleware/express.js +219 -0
- package/dist/esm/middleware/express.js.map +1 -0
- package/dist/types/core/Analytics.d.ts +35 -0
- package/dist/types/core/Analytics.d.ts.map +1 -0
- package/dist/types/core/Classifier.d.ts +21 -0
- package/dist/types/core/Classifier.d.ts.map +1 -0
- package/dist/types/core/HealthMonitor.d.ts +14 -0
- package/dist/types/core/HealthMonitor.d.ts.map +1 -0
- package/dist/types/core/RootCause.d.ts +12 -0
- package/dist/types/core/RootCause.d.ts.map +1 -0
- package/dist/types/core/Sampler.d.ts +13 -0
- package/dist/types/core/Sampler.d.ts.map +1 -0
- package/dist/types/core/Timeline.d.ts +22 -0
- package/dist/types/core/Timeline.d.ts.map +1 -0
- package/dist/types/core/TraceEngine.d.ts +47 -0
- package/dist/types/core/TraceEngine.d.ts.map +1 -0
- package/dist/types/core/types.d.ts +118 -0
- package/dist/types/core/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +18 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/integrations/axios.d.ts +22 -0
- package/dist/types/integrations/axios.d.ts.map +1 -0
- package/dist/types/integrations/fetch.d.ts +25 -0
- package/dist/types/integrations/fetch.d.ts.map +1 -0
- package/dist/types/integrations/mongo.d.ts +26 -0
- package/dist/types/integrations/mongo.d.ts.map +1 -0
- package/dist/types/integrations/mysql.d.ts +20 -0
- package/dist/types/integrations/mysql.d.ts.map +1 -0
- package/dist/types/integrations/postgres.d.ts +20 -0
- package/dist/types/integrations/postgres.d.ts.map +1 -0
- package/dist/types/integrations/redis.d.ts +20 -0
- package/dist/types/integrations/redis.d.ts.map +1 -0
- package/dist/types/middleware/express.d.ts +39 -0
- package/dist/types/middleware/express.d.ts.map +1 -0
- package/example/server.ts +234 -0
- package/jest.config.js +8 -0
- package/package.json +110 -0
- package/portfolio-repo/APIRESPONSE DASH.png +0 -0
- package/portfolio-repo/PAYLOAD.png +0 -0
- package/portfolio-repo/README.md +182 -0
- package/src/core/Analytics.ts +209 -0
- package/src/core/Classifier.ts +82 -0
- package/src/core/HealthMonitor.ts +92 -0
- package/src/core/RootCause.ts +105 -0
- package/src/core/Sampler.ts +35 -0
- package/src/core/Timeline.ts +108 -0
- package/src/core/TraceEngine.ts +266 -0
- package/src/core/types.ts +170 -0
- package/src/index.ts +42 -0
- package/src/integrations/axios.ts +164 -0
- package/src/integrations/fetch.ts +172 -0
- package/src/integrations/mongo.ts +130 -0
- package/src/integrations/mysql.ts +239 -0
- package/src/integrations/postgres.ts +217 -0
- package/src/integrations/redis.ts +122 -0
- package/src/middleware/express.ts +264 -0
- package/tests/Analytics.test.ts +136 -0
- package/tests/Classifier.test.ts +57 -0
- package/tests/RootCause.test.ts +69 -0
- package/tests/TraceEngine.test.ts +110 -0
- package/tsconfig.cjs.json +9 -0
- package/tsconfig.esm.json +9 -0
- package/tsconfig.json +31 -0
- package/tsconfig.types.json +8 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Analytics Engine
|
|
3
|
+
// Per-endpoint aggregation + service failure grouping
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
Trace,
|
|
8
|
+
EndpointStats,
|
|
9
|
+
AnalyticsReport,
|
|
10
|
+
ServiceFailureStats,
|
|
11
|
+
ServiceTag,
|
|
12
|
+
HealthStatus,
|
|
13
|
+
} from './types';
|
|
14
|
+
import { HealthMonitor } from './HealthMonitor';
|
|
15
|
+
|
|
16
|
+
export class Analytics {
|
|
17
|
+
private traces: Trace[] = [];
|
|
18
|
+
private maxTraces: number;
|
|
19
|
+
private startTime: Date;
|
|
20
|
+
private healthMonitor: HealthMonitor;
|
|
21
|
+
|
|
22
|
+
constructor(maxTraces = 1000) {
|
|
23
|
+
this.maxTraces = maxTraces;
|
|
24
|
+
this.startTime = new Date();
|
|
25
|
+
this.healthMonitor = new HealthMonitor();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Record a completed trace */
|
|
29
|
+
record(trace: Trace): void {
|
|
30
|
+
try {
|
|
31
|
+
this.traces.push(trace);
|
|
32
|
+
|
|
33
|
+
// Trim if over limit (keep most recent)
|
|
34
|
+
if (this.traces.length > this.maxTraces) {
|
|
35
|
+
this.traces = this.traces.slice(-this.maxTraces);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Update health monitor with step results
|
|
39
|
+
for (const step of trace.steps) {
|
|
40
|
+
this.healthMonitor.recordStep(step);
|
|
41
|
+
}
|
|
42
|
+
} catch (_) {
|
|
43
|
+
// never crash
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Get full analytics report */
|
|
48
|
+
getReport(): AnalyticsReport {
|
|
49
|
+
const endpoints = this.getEndpointStats();
|
|
50
|
+
const totalRequests = this.traces.length;
|
|
51
|
+
const totalErrors = this.traces.filter(t => t.classification === 'ERROR' || t.classification === 'CRITICAL').length;
|
|
52
|
+
const totalSlow = this.traces.filter(t => t.classification === 'WARN').length;
|
|
53
|
+
const uptime = Date.now() - this.startTime.getTime();
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
totalRequests,
|
|
57
|
+
totalErrors,
|
|
58
|
+
totalSlow,
|
|
59
|
+
uptime,
|
|
60
|
+
endpoints,
|
|
61
|
+
serviceHealth: this.healthMonitor.getAllHealth(),
|
|
62
|
+
topFailures: this.getTopFailures(),
|
|
63
|
+
recentTraces: this.traces.slice(-20).reverse(),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Get stats for a specific endpoint */
|
|
68
|
+
getEndpointReport(path: string): EndpointStats | null {
|
|
69
|
+
const stats = this.getEndpointStats();
|
|
70
|
+
return stats.find(s => s.path === path) || null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Get aggregated stats per endpoint */
|
|
74
|
+
private getEndpointStats(): EndpointStats[] {
|
|
75
|
+
const grouped = new Map<string, Trace[]>();
|
|
76
|
+
|
|
77
|
+
for (const trace of this.traces) {
|
|
78
|
+
const key = `${trace.method}:${trace.endpoint}`;
|
|
79
|
+
if (!grouped.has(key)) grouped.set(key, []);
|
|
80
|
+
grouped.get(key)!.push(trace);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const stats: EndpointStats[] = [];
|
|
84
|
+
for (const [key, traces] of grouped) {
|
|
85
|
+
const [method, path] = key.split(':');
|
|
86
|
+
const durations = traces.map(t => t.totalDuration).sort((a, b) => a - b);
|
|
87
|
+
const errorCount = traces.filter(t => t.classification === 'ERROR' || t.classification === 'CRITICAL').length;
|
|
88
|
+
const slowCount = traces.filter(t => t.classification === 'WARN').length;
|
|
89
|
+
|
|
90
|
+
// Common issues
|
|
91
|
+
const issues = new Map<string, number>();
|
|
92
|
+
for (const t of traces) {
|
|
93
|
+
if (t.rootCause) {
|
|
94
|
+
const key = t.rootCause.cause;
|
|
95
|
+
issues.set(key, (issues.get(key) || 0) + 1);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
const commonIssues = [...issues.entries()]
|
|
99
|
+
.sort((a, b) => b[1] - a[1])
|
|
100
|
+
.slice(0, 5)
|
|
101
|
+
.map(([issue, count]) => `${issue} (${count}x)`);
|
|
102
|
+
|
|
103
|
+
// Service failure breakdown
|
|
104
|
+
const serviceFailures = this.getServiceFailures(traces);
|
|
105
|
+
|
|
106
|
+
stats.push({
|
|
107
|
+
path,
|
|
108
|
+
method,
|
|
109
|
+
totalRequests: traces.length,
|
|
110
|
+
errorCount,
|
|
111
|
+
slowCount,
|
|
112
|
+
avgDuration: durations.reduce((a, b) => a + b, 0) / durations.length,
|
|
113
|
+
p95Duration: durations[Math.floor(durations.length * 0.95)] || 0,
|
|
114
|
+
maxDuration: durations[durations.length - 1] || 0,
|
|
115
|
+
commonIssues,
|
|
116
|
+
serviceFailures,
|
|
117
|
+
recentTraces: traces.slice(-5).reverse(),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return stats.sort((a, b) => b.totalRequests - a.totalRequests);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/** Get service failure breakdown across all traces */
|
|
125
|
+
private getTopFailures(): ServiceFailureStats[] {
|
|
126
|
+
return this.getServiceFailures(this.traces);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Calculate service failure stats from a set of traces */
|
|
130
|
+
private getServiceFailures(traces: Trace[]): ServiceFailureStats[] {
|
|
131
|
+
const failures = new Map<ServiceTag, number>();
|
|
132
|
+
let totalFailures = 0;
|
|
133
|
+
|
|
134
|
+
for (const trace of traces) {
|
|
135
|
+
for (const step of trace.steps) {
|
|
136
|
+
if (step.status === 'error' || step.status === 'timeout') {
|
|
137
|
+
failures.set(step.service, (failures.get(step.service) || 0) + 1);
|
|
138
|
+
totalFailures++;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return [...failures.entries()]
|
|
144
|
+
.map(([service, count]) => ({
|
|
145
|
+
service,
|
|
146
|
+
count,
|
|
147
|
+
percentage: totalFailures > 0 ? Math.round((count / totalFailures) * 100) : 0,
|
|
148
|
+
}))
|
|
149
|
+
.sort((a, b) => b.count - a.count);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Clear all stored traces */
|
|
153
|
+
clear(): void {
|
|
154
|
+
this.traces = [];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Get raw trace count */
|
|
158
|
+
getTraceCount(): number {
|
|
159
|
+
return this.traces.length;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Search traces by traceId, endpoint, or error message.
|
|
164
|
+
* Returns matching traces sorted by most recent first.
|
|
165
|
+
*/
|
|
166
|
+
searchTraces(query: string, options?: { env?: string; limit?: number }): Trace[] {
|
|
167
|
+
if (!query) return [];
|
|
168
|
+
|
|
169
|
+
const lowerQuery = query.toLowerCase();
|
|
170
|
+
const limit = options?.limit || 50;
|
|
171
|
+
|
|
172
|
+
const matches = this.traces.filter(trace => {
|
|
173
|
+
// Filter by environment if specified
|
|
174
|
+
if (options?.env && trace.environment !== options.env) {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Match traceId
|
|
179
|
+
if (trace.traceId.toLowerCase().includes(lowerQuery)) {
|
|
180
|
+
return true;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Match endpoint
|
|
184
|
+
if (trace.endpoint.toLowerCase().includes(lowerQuery)) {
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Match error message in steps
|
|
189
|
+
for (const step of trace.steps) {
|
|
190
|
+
if (step.error && step.error.toLowerCase().includes(lowerQuery)) {
|
|
191
|
+
return true;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Match root cause
|
|
196
|
+
if (trace.rootCause && trace.rootCause.cause.toLowerCase().includes(lowerQuery)) {
|
|
197
|
+
return true;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return false;
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
return matches.slice(-limit).reverse();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
getHealthMonitor(): HealthMonitor {
|
|
207
|
+
return this.healthMonitor;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Classifier
|
|
3
|
+
// Classifies steps and traces into severity levels
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
ClassificationLevel,
|
|
8
|
+
StepStatus,
|
|
9
|
+
TraceStep,
|
|
10
|
+
DebuggerConfig,
|
|
11
|
+
} from './types';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Classify a single step based on its duration and status.
|
|
15
|
+
*
|
|
16
|
+
* INFO → normal, <slowThreshold ms
|
|
17
|
+
* WARN → slow (>slowThreshold ms)
|
|
18
|
+
* ERROR → step failed
|
|
19
|
+
* CRITICAL → dependency down / timeout
|
|
20
|
+
*/
|
|
21
|
+
export function classify(
|
|
22
|
+
duration: number,
|
|
23
|
+
status: StepStatus,
|
|
24
|
+
config: Pick<DebuggerConfig, 'slowThreshold'>,
|
|
25
|
+
): ClassificationLevel {
|
|
26
|
+
const threshold = config.slowThreshold ?? 300;
|
|
27
|
+
|
|
28
|
+
if (status === 'timeout') return 'CRITICAL';
|
|
29
|
+
if (status === 'error') return 'ERROR';
|
|
30
|
+
if (duration > threshold) return 'WARN';
|
|
31
|
+
return 'INFO';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Classify an entire trace based on its steps.
|
|
36
|
+
* The trace gets the highest severity from any of its steps.
|
|
37
|
+
*/
|
|
38
|
+
export function classifyTrace(
|
|
39
|
+
steps: TraceStep[],
|
|
40
|
+
_totalDuration: number,
|
|
41
|
+
config: Pick<DebuggerConfig, 'slowThreshold'>,
|
|
42
|
+
): ClassificationLevel {
|
|
43
|
+
const threshold = config.slowThreshold ?? 300;
|
|
44
|
+
let maxLevel: ClassificationLevel = 'INFO';
|
|
45
|
+
|
|
46
|
+
const priority: Record<ClassificationLevel, number> = {
|
|
47
|
+
INFO: 0,
|
|
48
|
+
WARN: 1,
|
|
49
|
+
ERROR: 2,
|
|
50
|
+
CRITICAL: 3,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
for (const step of steps) {
|
|
54
|
+
if (priority[step.classification] > priority[maxLevel]) {
|
|
55
|
+
maxLevel = step.classification;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Also check total duration
|
|
60
|
+
if (maxLevel === 'INFO' && _totalDuration > threshold) {
|
|
61
|
+
maxLevel = 'WARN';
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return maxLevel;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Classify a database query specifically — used by integrations
|
|
69
|
+
* to detect slow queries with a dedicated threshold.
|
|
70
|
+
*/
|
|
71
|
+
export function classifyQuery(
|
|
72
|
+
duration: number,
|
|
73
|
+
status: StepStatus,
|
|
74
|
+
config: Pick<DebuggerConfig, 'slowQueryThreshold'>,
|
|
75
|
+
): ClassificationLevel {
|
|
76
|
+
const threshold = config.slowQueryThreshold ?? 300;
|
|
77
|
+
|
|
78
|
+
if (status === 'timeout') return 'CRITICAL';
|
|
79
|
+
if (status === 'error') return 'ERROR';
|
|
80
|
+
if (duration > threshold) return 'WARN';
|
|
81
|
+
return 'INFO';
|
|
82
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Health Monitor
|
|
3
|
+
// Tracks dependency health from step results
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
TraceStep,
|
|
8
|
+
HealthStatus,
|
|
9
|
+
HealthState,
|
|
10
|
+
ServiceTag,
|
|
11
|
+
} from './types';
|
|
12
|
+
|
|
13
|
+
interface HealthRecord {
|
|
14
|
+
service: ServiceTag;
|
|
15
|
+
name: string;
|
|
16
|
+
successes: number;
|
|
17
|
+
failures: number;
|
|
18
|
+
lastCheck: Date;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class HealthMonitor {
|
|
22
|
+
private records = new Map<string, HealthRecord>();
|
|
23
|
+
|
|
24
|
+
/** Record a step result to update health tracking */
|
|
25
|
+
recordStep(step: TraceStep): void {
|
|
26
|
+
try {
|
|
27
|
+
const key = step.service;
|
|
28
|
+
if (key === 'internal' || key === 'unknown') return;
|
|
29
|
+
|
|
30
|
+
if (!this.records.has(key)) {
|
|
31
|
+
this.records.set(key, {
|
|
32
|
+
service: step.service,
|
|
33
|
+
name: key,
|
|
34
|
+
successes: 0,
|
|
35
|
+
failures: 0,
|
|
36
|
+
lastCheck: new Date(),
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const record = this.records.get(key)!;
|
|
41
|
+
record.lastCheck = new Date();
|
|
42
|
+
|
|
43
|
+
if (step.status === 'success') {
|
|
44
|
+
record.successes++;
|
|
45
|
+
} else {
|
|
46
|
+
record.failures++;
|
|
47
|
+
}
|
|
48
|
+
} catch (_) {
|
|
49
|
+
// never crash
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Get health status for a specific service */
|
|
54
|
+
getHealth(service: ServiceTag): HealthStatus | null {
|
|
55
|
+
const record = this.records.get(service);
|
|
56
|
+
if (!record) return null;
|
|
57
|
+
return this.toHealthStatus(record);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Get health status for all tracked services */
|
|
61
|
+
getAllHealth(): HealthStatus[] {
|
|
62
|
+
return [...this.records.values()].map(r => this.toHealthStatus(r));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private toHealthStatus(record: HealthRecord): HealthStatus {
|
|
66
|
+
const total = record.successes + record.failures;
|
|
67
|
+
const successRate = total > 0 ? record.successes / total : 1;
|
|
68
|
+
|
|
69
|
+
let status: HealthState;
|
|
70
|
+
if (successRate >= 0.95) {
|
|
71
|
+
status = 'healthy';
|
|
72
|
+
} else if (successRate >= 0.7) {
|
|
73
|
+
status = 'degraded';
|
|
74
|
+
} else {
|
|
75
|
+
status = 'down';
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
service: record.service,
|
|
80
|
+
name: record.name,
|
|
81
|
+
status,
|
|
82
|
+
lastCheck: record.lastCheck,
|
|
83
|
+
successRate: Math.round(successRate * 100),
|
|
84
|
+
totalChecks: total,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Reset all health records */
|
|
89
|
+
reset(): void {
|
|
90
|
+
this.records.clear();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Root Cause Detection
|
|
3
|
+
// Analyzes trace steps to identify the most likely failure origin
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
TraceStep,
|
|
8
|
+
RootCauseResult,
|
|
9
|
+
DebuggerConfig,
|
|
10
|
+
ServiceTag,
|
|
11
|
+
} from './types';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Detect the root cause of a trace failure or slowness.
|
|
15
|
+
*
|
|
16
|
+
* Algorithm:
|
|
17
|
+
* 1. If response is 500 and a step failed → root cause = first failed step
|
|
18
|
+
* 2. If a step timed out → root cause = timeout step (CRITICAL)
|
|
19
|
+
* 3. If DB/Redis slow and total latency high → root cause = slow dependency
|
|
20
|
+
* 4. If no failures but overall slow → root cause = slowest step
|
|
21
|
+
*/
|
|
22
|
+
export function detectRootCause(
|
|
23
|
+
steps: TraceStep[],
|
|
24
|
+
statusCode: number | undefined,
|
|
25
|
+
config: Pick<DebuggerConfig, 'slowThreshold' | 'slowQueryThreshold'>,
|
|
26
|
+
): RootCauseResult | null {
|
|
27
|
+
if (steps.length === 0) return null;
|
|
28
|
+
|
|
29
|
+
const slowThreshold = config.slowThreshold ?? 300;
|
|
30
|
+
|
|
31
|
+
// 1. Look for timed-out steps (highest priority)
|
|
32
|
+
const timedOut = steps.find(s => s.status === 'timeout');
|
|
33
|
+
if (timedOut) {
|
|
34
|
+
return {
|
|
35
|
+
cause: `${timedOut.name} timed out`,
|
|
36
|
+
step: timedOut.name,
|
|
37
|
+
service: timedOut.service,
|
|
38
|
+
confidence: 'high',
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// 2. Look for failed steps when response is 5xx
|
|
43
|
+
if (statusCode && statusCode >= 500) {
|
|
44
|
+
const failedSteps = steps.filter(s => s.status === 'error');
|
|
45
|
+
if (failedSteps.length > 0) {
|
|
46
|
+
// First failure is most likely root cause
|
|
47
|
+
const first = failedSteps[0];
|
|
48
|
+
return {
|
|
49
|
+
cause: `${first.name} failed: ${first.error || 'unknown error'}`,
|
|
50
|
+
step: first.name,
|
|
51
|
+
service: first.service,
|
|
52
|
+
confidence: 'high',
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// 3. Look for any failed step (even if response isn't 500)
|
|
58
|
+
const failedStep = steps.find(s => s.status === 'error');
|
|
59
|
+
if (failedStep) {
|
|
60
|
+
return {
|
|
61
|
+
cause: `${failedStep.name} failed: ${failedStep.error || 'unknown error'}`,
|
|
62
|
+
step: failedStep.name,
|
|
63
|
+
service: failedStep.service,
|
|
64
|
+
confidence: statusCode && statusCode >= 400 ? 'high' : 'medium',
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// 4. Look for slow steps
|
|
69
|
+
const slowSteps = steps.filter(s => s.duration > slowThreshold);
|
|
70
|
+
if (slowSteps.length > 0) {
|
|
71
|
+
// Slowest step is the most likely bottleneck
|
|
72
|
+
const slowest = slowSteps.reduce((a, b) => (a.duration > b.duration ? a : b));
|
|
73
|
+
const totalDuration = steps.reduce((sum, s) => sum + s.duration, 0);
|
|
74
|
+
const ratio = slowest.duration / totalDuration;
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
cause: `Slow ${getServiceLabel(slowest.service)} query: ${slowest.name} (${Math.round(slowest.duration)}ms)`,
|
|
78
|
+
step: slowest.name,
|
|
79
|
+
service: slowest.service,
|
|
80
|
+
confidence: ratio > 0.5 ? 'high' : 'medium',
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function getServiceLabel(service: ServiceTag): string {
|
|
88
|
+
const labels: Record<ServiceTag, string> = {
|
|
89
|
+
mongo: 'MongoDB',
|
|
90
|
+
mysql: 'MySQL',
|
|
91
|
+
postgres: 'PostgreSQL',
|
|
92
|
+
redis: 'Redis',
|
|
93
|
+
axios: 'HTTP',
|
|
94
|
+
fetch: 'HTTP',
|
|
95
|
+
stripe: 'Stripe',
|
|
96
|
+
razorpay: 'Razorpay',
|
|
97
|
+
sendgrid: 'SendGrid',
|
|
98
|
+
twilio: 'Twilio',
|
|
99
|
+
external: 'External',
|
|
100
|
+
internal: 'Internal',
|
|
101
|
+
unknown: 'Unknown',
|
|
102
|
+
};
|
|
103
|
+
return labels[service] || service;
|
|
104
|
+
}
|
|
105
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Sampler
|
|
3
|
+
// Probabilistic sampling for high-traffic environments
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
export class Sampler {
|
|
7
|
+
private rate: number;
|
|
8
|
+
private alwaysSampleErrors: boolean;
|
|
9
|
+
|
|
10
|
+
constructor(rate = 1, alwaysSampleErrors = true) {
|
|
11
|
+
this.rate = Math.max(0, Math.min(1, rate));
|
|
12
|
+
this.alwaysSampleErrors = alwaysSampleErrors;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Should this request be sampled? */
|
|
16
|
+
shouldSample(): boolean {
|
|
17
|
+
if (this.rate >= 1) return true;
|
|
18
|
+
if (this.rate <= 0) return false;
|
|
19
|
+
return Math.random() < this.rate;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Force sample if it's an error (if alwaysSampleErrors is true) */
|
|
23
|
+
shouldSampleError(): boolean {
|
|
24
|
+
return this.alwaysSampleErrors;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Update sampling rate at runtime */
|
|
28
|
+
setRate(rate: number): void {
|
|
29
|
+
this.rate = Math.max(0, Math.min(1, rate));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
getRate(): number {
|
|
33
|
+
return this.rate;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────
|
|
2
|
+
// flow-debugger — Timeline Renderer
|
|
3
|
+
// Prints a visual timeline of a trace to the console
|
|
4
|
+
// ─────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
import { Trace, ClassificationLevel } from './types';
|
|
7
|
+
|
|
8
|
+
const ICONS: Record<string, string> = {
|
|
9
|
+
success: '✔',
|
|
10
|
+
error: '❌',
|
|
11
|
+
timeout: '⏱ TIMEOUT',
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const LEVEL_COLORS: Record<ClassificationLevel, string> = {
|
|
15
|
+
INFO: '\x1b[32m', // green
|
|
16
|
+
WARN: '\x1b[33m', // yellow
|
|
17
|
+
ERROR: '\x1b[31m', // red
|
|
18
|
+
CRITICAL: '\x1b[35m', // magenta
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const RESET = '\x1b[0m';
|
|
22
|
+
const DIM = '\x1b[2m';
|
|
23
|
+
const BOLD = '\x1b[1m';
|
|
24
|
+
const CYAN = '\x1b[36m';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Render a trace timeline to the console.
|
|
28
|
+
*
|
|
29
|
+
* Output example:
|
|
30
|
+
* ┌─── flow-debugger ── req_abc123 ── POST /login ───
|
|
31
|
+
* │ [0ms] Request start
|
|
32
|
+
* │ [2ms] DB find user ✔ (14ms) [mongo]
|
|
33
|
+
* │ [16ms] Redis cache ❌ (3ms) [redis]
|
|
34
|
+
* │ [20ms] Response 200
|
|
35
|
+
* │
|
|
36
|
+
* │ ⚠ Root cause: Redis cache failed: connection refused
|
|
37
|
+
* │ Classification: ERROR
|
|
38
|
+
* │ Total: 20ms
|
|
39
|
+
* └────────────────────────────────────────────────────
|
|
40
|
+
*/
|
|
41
|
+
export function renderTimeline(
|
|
42
|
+
trace: Trace,
|
|
43
|
+
logger: (...args: unknown[]) => void = console.log,
|
|
44
|
+
): void {
|
|
45
|
+
const lines: string[] = [];
|
|
46
|
+
const divider = '─'.repeat(50);
|
|
47
|
+
|
|
48
|
+
lines.push('');
|
|
49
|
+
lines.push(`${CYAN}┌─── flow-debugger ── ${trace.traceId} ── ${trace.method} ${trace.endpoint} ───${RESET}`);
|
|
50
|
+
lines.push(`${DIM}│ [0ms] Request start${RESET}`);
|
|
51
|
+
|
|
52
|
+
for (const step of trace.steps) {
|
|
53
|
+
const offset = Math.round(step.startTime);
|
|
54
|
+
const dur = Math.round(step.duration);
|
|
55
|
+
const icon = ICONS[step.status] || '?';
|
|
56
|
+
const levelColor = LEVEL_COLORS[step.classification] || '';
|
|
57
|
+
const serviceTag = step.service !== 'internal' ? ` ${DIM}[${step.service}]${RESET}` : '';
|
|
58
|
+
|
|
59
|
+
const line = `│ [${offset}ms]${' '.repeat(Math.max(1, 5 - String(offset).length))}` +
|
|
60
|
+
`${levelColor}${step.name} ${icon} (${dur}ms)${RESET}${serviceTag}`;
|
|
61
|
+
|
|
62
|
+
lines.push(line);
|
|
63
|
+
|
|
64
|
+
if (step.error) {
|
|
65
|
+
lines.push(`│ ${DIM}└─ ${step.error}${RESET}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Slow query warning
|
|
69
|
+
if (step.classification === 'WARN' && step.duration > 300) {
|
|
70
|
+
lines.push(`│ ${LEVEL_COLORS.WARN}⚠ Slow ${step.service} query detected (${dur}ms)${RESET}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Response line
|
|
75
|
+
lines.push(`${DIM}│ [${Math.round(trace.totalDuration)}ms]${' '.repeat(Math.max(1, 5 - String(Math.round(trace.totalDuration)).length))}Response ${trace.statusCode || '—'}${RESET}`);
|
|
76
|
+
lines.push('│');
|
|
77
|
+
|
|
78
|
+
// Root cause
|
|
79
|
+
if (trace.rootCause) {
|
|
80
|
+
const rcColor = LEVEL_COLORS.ERROR;
|
|
81
|
+
lines.push(`${rcColor}│ 🔍 Root cause: ${trace.rootCause.cause}${RESET}`);
|
|
82
|
+
lines.push(`${DIM}│ Service: ${trace.rootCause.service} | Confidence: ${trace.rootCause.confidence}${RESET}`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Classification
|
|
86
|
+
const clsColor = LEVEL_COLORS[trace.classification];
|
|
87
|
+
lines.push(`${clsColor}│ Classification: ${trace.classification}${RESET}`);
|
|
88
|
+
lines.push(`${DIM}│ Total: ${Math.round(trace.totalDuration)}ms${RESET}`);
|
|
89
|
+
lines.push(`${CYAN}└${divider}${RESET}`);
|
|
90
|
+
lines.push('');
|
|
91
|
+
|
|
92
|
+
logger(lines.join('\n'));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Render a compact one-line summary for high-traffic mode.
|
|
97
|
+
*/
|
|
98
|
+
export function renderCompact(
|
|
99
|
+
trace: Trace,
|
|
100
|
+
logger: (...args: unknown[]) => void = console.log,
|
|
101
|
+
): void {
|
|
102
|
+
const icon = trace.classification === 'INFO' ? '✔' :
|
|
103
|
+
trace.classification === 'WARN' ? '⚠' :
|
|
104
|
+
trace.classification === 'ERROR' ? '❌' :
|
|
105
|
+
'🔴';
|
|
106
|
+
const rc = trace.rootCause ? ` → ${trace.rootCause.cause}` : '';
|
|
107
|
+
logger(`${icon} ${trace.method} ${trace.endpoint} ${Math.round(trace.totalDuration)}ms [${trace.classification}]${rc}`);
|
|
108
|
+
}
|