4runr-os 2.10.49 → 2.10.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/gateway/src/metrics/monitoring-detail.ts +162 -162
- package/apps/gateway/src/middleware/log-capture.ts +70 -70
- package/apps/gateway/src/routes/monitoring.ts +298 -298
- package/mk3-tui/src/app.rs +37 -47
- package/mk3-tui/src/monitoring/mod.rs +439 -439
- package/mk3-tui/src/ui/portal_monitoring.rs +3 -2
- package/package.json +2 -2
|
@@ -1,162 +1,162 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Phase 3: structured snapshots from prom-client metrics for /api/monitoring/metrics/*
|
|
3
|
-
* (no Prometheus text parsing — reads live registry values.)
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import {
|
|
7
|
-
httpRequestTotal,
|
|
8
|
-
httpRequestErrors,
|
|
9
|
-
runsCreated,
|
|
10
|
-
runsStarted,
|
|
11
|
-
runsCompleted,
|
|
12
|
-
runsActive,
|
|
13
|
-
sseConnectionsOpened,
|
|
14
|
-
sseConnectionsClosed,
|
|
15
|
-
sseActiveConnections,
|
|
16
|
-
sseMessagesTotal,
|
|
17
|
-
dbConnectionsActive,
|
|
18
|
-
dbConnectionsIdle,
|
|
19
|
-
redisConnectionsActive,
|
|
20
|
-
} from './index.js';
|
|
21
|
-
|
|
22
|
-
function sumCounterValues(m: { values: Array<{ value: number }> }): number {
|
|
23
|
-
return m.values.reduce((s, x) => s + x.value, 0);
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
function gaugeValue(m: { values: Array<{ value: number }> }): number {
|
|
27
|
-
return m.values[0]?.value ?? 0;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function isFailedRunStatus(status: string): boolean {
|
|
31
|
-
const normalized = status.trim().toLowerCase();
|
|
32
|
-
return [
|
|
33
|
-
'failed',
|
|
34
|
-
'failure',
|
|
35
|
-
'error',
|
|
36
|
-
'errored',
|
|
37
|
-
'cancelled',
|
|
38
|
-
'canceled',
|
|
39
|
-
'timeout',
|
|
40
|
-
'timed_out',
|
|
41
|
-
].includes(normalized);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export async function getMonitoringHttpPayload(): Promise<Record<string, unknown>> {
|
|
45
|
-
const req = await httpRequestTotal.get();
|
|
46
|
-
let totalRequests = 0;
|
|
47
|
-
const byRoute: Array<{ method: string; route: string; status_code: string; count: number }> =
|
|
48
|
-
[];
|
|
49
|
-
for (const x of req.values) {
|
|
50
|
-
totalRequests += x.value;
|
|
51
|
-
byRoute.push({
|
|
52
|
-
method: String(x.labels.method ?? ''),
|
|
53
|
-
route: String(x.labels.route ?? ''),
|
|
54
|
-
status_code: String(x.labels.status_code ?? ''),
|
|
55
|
-
count: x.value,
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
byRoute.sort((a, b) => b.count - a.count);
|
|
59
|
-
|
|
60
|
-
const err = await httpRequestErrors.get();
|
|
61
|
-
let totalErrors = 0;
|
|
62
|
-
const errorsByRoute: Array<{
|
|
63
|
-
method: string;
|
|
64
|
-
route: string;
|
|
65
|
-
error_type: string;
|
|
66
|
-
count: number;
|
|
67
|
-
}> = [];
|
|
68
|
-
for (const x of err.values) {
|
|
69
|
-
totalErrors += x.value;
|
|
70
|
-
errorsByRoute.push({
|
|
71
|
-
method: String(x.labels.method ?? ''),
|
|
72
|
-
route: String(x.labels.route ?? ''),
|
|
73
|
-
error_type: String(x.labels.error_type ?? ''),
|
|
74
|
-
count: x.value,
|
|
75
|
-
});
|
|
76
|
-
}
|
|
77
|
-
errorsByRoute.sort((a, b) => b.count - a.count);
|
|
78
|
-
|
|
79
|
-
const errorRate = totalRequests > 0 ? totalErrors / totalRequests : 0;
|
|
80
|
-
|
|
81
|
-
return {
|
|
82
|
-
placeholder: false,
|
|
83
|
-
summary: {
|
|
84
|
-
totalRequests,
|
|
85
|
-
totalErrors,
|
|
86
|
-
errorRate: Number(errorRate.toFixed(4)),
|
|
87
|
-
},
|
|
88
|
-
topRoutes: byRoute.slice(0, 25),
|
|
89
|
-
errorsByRoute: errorsByRoute.slice(0, 15),
|
|
90
|
-
timestamp: new Date().toISOString(),
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
export async function getMonitoringRunsPayload(): Promise<Record<string, unknown>> {
|
|
95
|
-
const created = sumCounterValues(await runsCreated.get());
|
|
96
|
-
const started = sumCounterValues(await runsStarted.get());
|
|
97
|
-
const completedByStatus = (await runsCompleted.get()).values.map((x) => ({
|
|
98
|
-
status: String(x.labels.status ?? ''),
|
|
99
|
-
count: x.value,
|
|
100
|
-
}));
|
|
101
|
-
const completed = completedByStatus.reduce((s, x) => s + x.count, 0);
|
|
102
|
-
const failedByStatus = completedByStatus.filter((x) => isFailedRunStatus(x.status));
|
|
103
|
-
const active = gaugeValue(await runsActive.get());
|
|
104
|
-
|
|
105
|
-
return {
|
|
106
|
-
placeholder: false,
|
|
107
|
-
summary: {
|
|
108
|
-
created,
|
|
109
|
-
started,
|
|
110
|
-
completed,
|
|
111
|
-
failed: failedByStatus.reduce((s, x) => s + x.count, 0),
|
|
112
|
-
active,
|
|
113
|
-
},
|
|
114
|
-
byStatus: completedByStatus,
|
|
115
|
-
failedStatusLabels: failedByStatus.map((x) => x.status),
|
|
116
|
-
timestamp: new Date().toISOString(),
|
|
117
|
-
};
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
export async function getMonitoringSsePayload(): Promise<Record<string, unknown>> {
|
|
121
|
-
const opened = sumCounterValues(await sseConnectionsOpened.get());
|
|
122
|
-
const closed = sumCounterValues(await sseConnectionsClosed.get());
|
|
123
|
-
const active = gaugeValue(await sseActiveConnections.get());
|
|
124
|
-
const messages = sumCounterValues(await sseMessagesTotal.get());
|
|
125
|
-
|
|
126
|
-
return {
|
|
127
|
-
placeholder: false,
|
|
128
|
-
summary: {
|
|
129
|
-
connectionsOpened: opened,
|
|
130
|
-
connectionsClosed: closed,
|
|
131
|
-
activeConnections: active,
|
|
132
|
-
messagesSent: messages,
|
|
133
|
-
},
|
|
134
|
-
timestamp: new Date().toISOString(),
|
|
135
|
-
};
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
export async function getDependencyPoolsPayload(): Promise<{
|
|
139
|
-
database: { active: number; idle: number };
|
|
140
|
-
redis: { active: number };
|
|
141
|
-
telemetry: {
|
|
142
|
-
source: string;
|
|
143
|
-
bestEffort: boolean;
|
|
144
|
-
note: string;
|
|
145
|
-
};
|
|
146
|
-
}> {
|
|
147
|
-
return {
|
|
148
|
-
database: {
|
|
149
|
-
active: gaugeValue(await dbConnectionsActive.get()),
|
|
150
|
-
idle: gaugeValue(await dbConnectionsIdle.get()),
|
|
151
|
-
},
|
|
152
|
-
redis: {
|
|
153
|
-
active: gaugeValue(await redisConnectionsActive.get()),
|
|
154
|
-
},
|
|
155
|
-
telemetry: {
|
|
156
|
-
source: 'prom-client connection gauges',
|
|
157
|
-
bestEffort: true,
|
|
158
|
-
note:
|
|
159
|
-
'Connection gauges are best-effort instrumentation; zeros can mean the gauges are not being updated, not necessarily that dependencies are disconnected.',
|
|
160
|
-
},
|
|
161
|
-
};
|
|
162
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Phase 3: structured snapshots from prom-client metrics for /api/monitoring/metrics/*
|
|
3
|
+
* (no Prometheus text parsing — reads live registry values.)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
httpRequestTotal,
|
|
8
|
+
httpRequestErrors,
|
|
9
|
+
runsCreated,
|
|
10
|
+
runsStarted,
|
|
11
|
+
runsCompleted,
|
|
12
|
+
runsActive,
|
|
13
|
+
sseConnectionsOpened,
|
|
14
|
+
sseConnectionsClosed,
|
|
15
|
+
sseActiveConnections,
|
|
16
|
+
sseMessagesTotal,
|
|
17
|
+
dbConnectionsActive,
|
|
18
|
+
dbConnectionsIdle,
|
|
19
|
+
redisConnectionsActive,
|
|
20
|
+
} from './index.js';
|
|
21
|
+
|
|
22
|
+
function sumCounterValues(m: { values: Array<{ value: number }> }): number {
|
|
23
|
+
return m.values.reduce((s, x) => s + x.value, 0);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function gaugeValue(m: { values: Array<{ value: number }> }): number {
|
|
27
|
+
return m.values[0]?.value ?? 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isFailedRunStatus(status: string): boolean {
|
|
31
|
+
const normalized = status.trim().toLowerCase();
|
|
32
|
+
return [
|
|
33
|
+
'failed',
|
|
34
|
+
'failure',
|
|
35
|
+
'error',
|
|
36
|
+
'errored',
|
|
37
|
+
'cancelled',
|
|
38
|
+
'canceled',
|
|
39
|
+
'timeout',
|
|
40
|
+
'timed_out',
|
|
41
|
+
].includes(normalized);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function getMonitoringHttpPayload(): Promise<Record<string, unknown>> {
|
|
45
|
+
const req = await httpRequestTotal.get();
|
|
46
|
+
let totalRequests = 0;
|
|
47
|
+
const byRoute: Array<{ method: string; route: string; status_code: string; count: number }> =
|
|
48
|
+
[];
|
|
49
|
+
for (const x of req.values) {
|
|
50
|
+
totalRequests += x.value;
|
|
51
|
+
byRoute.push({
|
|
52
|
+
method: String(x.labels.method ?? ''),
|
|
53
|
+
route: String(x.labels.route ?? ''),
|
|
54
|
+
status_code: String(x.labels.status_code ?? ''),
|
|
55
|
+
count: x.value,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
byRoute.sort((a, b) => b.count - a.count);
|
|
59
|
+
|
|
60
|
+
const err = await httpRequestErrors.get();
|
|
61
|
+
let totalErrors = 0;
|
|
62
|
+
const errorsByRoute: Array<{
|
|
63
|
+
method: string;
|
|
64
|
+
route: string;
|
|
65
|
+
error_type: string;
|
|
66
|
+
count: number;
|
|
67
|
+
}> = [];
|
|
68
|
+
for (const x of err.values) {
|
|
69
|
+
totalErrors += x.value;
|
|
70
|
+
errorsByRoute.push({
|
|
71
|
+
method: String(x.labels.method ?? ''),
|
|
72
|
+
route: String(x.labels.route ?? ''),
|
|
73
|
+
error_type: String(x.labels.error_type ?? ''),
|
|
74
|
+
count: x.value,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
errorsByRoute.sort((a, b) => b.count - a.count);
|
|
78
|
+
|
|
79
|
+
const errorRate = totalRequests > 0 ? totalErrors / totalRequests : 0;
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
placeholder: false,
|
|
83
|
+
summary: {
|
|
84
|
+
totalRequests,
|
|
85
|
+
totalErrors,
|
|
86
|
+
errorRate: Number(errorRate.toFixed(4)),
|
|
87
|
+
},
|
|
88
|
+
topRoutes: byRoute.slice(0, 25),
|
|
89
|
+
errorsByRoute: errorsByRoute.slice(0, 15),
|
|
90
|
+
timestamp: new Date().toISOString(),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export async function getMonitoringRunsPayload(): Promise<Record<string, unknown>> {
|
|
95
|
+
const created = sumCounterValues(await runsCreated.get());
|
|
96
|
+
const started = sumCounterValues(await runsStarted.get());
|
|
97
|
+
const completedByStatus = (await runsCompleted.get()).values.map((x) => ({
|
|
98
|
+
status: String(x.labels.status ?? ''),
|
|
99
|
+
count: x.value,
|
|
100
|
+
}));
|
|
101
|
+
const completed = completedByStatus.reduce((s, x) => s + x.count, 0);
|
|
102
|
+
const failedByStatus = completedByStatus.filter((x) => isFailedRunStatus(x.status));
|
|
103
|
+
const active = gaugeValue(await runsActive.get());
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
placeholder: false,
|
|
107
|
+
summary: {
|
|
108
|
+
created,
|
|
109
|
+
started,
|
|
110
|
+
completed,
|
|
111
|
+
failed: failedByStatus.reduce((s, x) => s + x.count, 0),
|
|
112
|
+
active,
|
|
113
|
+
},
|
|
114
|
+
byStatus: completedByStatus,
|
|
115
|
+
failedStatusLabels: failedByStatus.map((x) => x.status),
|
|
116
|
+
timestamp: new Date().toISOString(),
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export async function getMonitoringSsePayload(): Promise<Record<string, unknown>> {
|
|
121
|
+
const opened = sumCounterValues(await sseConnectionsOpened.get());
|
|
122
|
+
const closed = sumCounterValues(await sseConnectionsClosed.get());
|
|
123
|
+
const active = gaugeValue(await sseActiveConnections.get());
|
|
124
|
+
const messages = sumCounterValues(await sseMessagesTotal.get());
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
placeholder: false,
|
|
128
|
+
summary: {
|
|
129
|
+
connectionsOpened: opened,
|
|
130
|
+
connectionsClosed: closed,
|
|
131
|
+
activeConnections: active,
|
|
132
|
+
messagesSent: messages,
|
|
133
|
+
},
|
|
134
|
+
timestamp: new Date().toISOString(),
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export async function getDependencyPoolsPayload(): Promise<{
|
|
139
|
+
database: { active: number; idle: number };
|
|
140
|
+
redis: { active: number };
|
|
141
|
+
telemetry: {
|
|
142
|
+
source: string;
|
|
143
|
+
bestEffort: boolean;
|
|
144
|
+
note: string;
|
|
145
|
+
};
|
|
146
|
+
}> {
|
|
147
|
+
return {
|
|
148
|
+
database: {
|
|
149
|
+
active: gaugeValue(await dbConnectionsActive.get()),
|
|
150
|
+
idle: gaugeValue(await dbConnectionsIdle.get()),
|
|
151
|
+
},
|
|
152
|
+
redis: {
|
|
153
|
+
active: gaugeValue(await redisConnectionsActive.get()),
|
|
154
|
+
},
|
|
155
|
+
telemetry: {
|
|
156
|
+
source: 'prom-client connection gauges',
|
|
157
|
+
bestEffort: true,
|
|
158
|
+
note:
|
|
159
|
+
'Connection gauges are best-effort instrumentation; zeros can mean the gauges are not being updated, not necessarily that dependencies are disconnected.',
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
}
|
|
@@ -1,70 +1,70 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Log capture middleware for monitoring log buffer
|
|
3
|
-
* Intercepts logger calls and feeds them to the monitoring log buffer
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { addMonitoringLog } from '../routes/monitoring.js';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Wrap a logger to capture log entries for monitoring
|
|
10
|
-
* Note: Uses duck typing to avoid strict type checks (compatible with both Logger and memoryLogger)
|
|
11
|
-
*/
|
|
12
|
-
export function wrapLoggerForMonitoring(logger: any): any {
|
|
13
|
-
// Skip if this doesn't look like a standard logger (e.g., memoryLogger)
|
|
14
|
-
if (!logger || typeof logger.info !== 'function') {
|
|
15
|
-
return logger;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const originalInfo = logger.info.bind(logger);
|
|
19
|
-
const originalWarn = logger.warn.bind(logger);
|
|
20
|
-
const originalError = logger.error.bind(logger);
|
|
21
|
-
const originalDebug = logger.debug?.bind(logger);
|
|
22
|
-
|
|
23
|
-
logger.info = (message: string, data?: Record<string, unknown>) => {
|
|
24
|
-
addMonitoringLog({
|
|
25
|
-
timestamp: new Date().toISOString(),
|
|
26
|
-
level: 'info',
|
|
27
|
-
message,
|
|
28
|
-
context: logger.context || 'Gateway',
|
|
29
|
-
...(data && { data }),
|
|
30
|
-
});
|
|
31
|
-
return originalInfo(message, data);
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
logger.warn = (message: string, data?: Record<string, unknown>) => {
|
|
35
|
-
addMonitoringLog({
|
|
36
|
-
timestamp: new Date().toISOString(),
|
|
37
|
-
level: 'warn',
|
|
38
|
-
message,
|
|
39
|
-
context: logger.context || 'Gateway',
|
|
40
|
-
...(data && { data }),
|
|
41
|
-
});
|
|
42
|
-
return originalWarn(message, data);
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
logger.error = (message: string, data?: Record<string, unknown>) => {
|
|
46
|
-
addMonitoringLog({
|
|
47
|
-
timestamp: new Date().toISOString(),
|
|
48
|
-
level: 'error',
|
|
49
|
-
message,
|
|
50
|
-
context: logger.context || 'Gateway',
|
|
51
|
-
...(data && { data }),
|
|
52
|
-
});
|
|
53
|
-
return originalError(message, data);
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
if (originalDebug) {
|
|
57
|
-
logger.debug = (message: string, data?: Record<string, unknown>) => {
|
|
58
|
-
addMonitoringLog({
|
|
59
|
-
timestamp: new Date().toISOString(),
|
|
60
|
-
level: 'debug',
|
|
61
|
-
message,
|
|
62
|
-
context: logger.context || 'Gateway',
|
|
63
|
-
...(data && { data }),
|
|
64
|
-
});
|
|
65
|
-
return originalDebug(message, data);
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
return logger;
|
|
70
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Log capture middleware for monitoring log buffer
|
|
3
|
+
* Intercepts logger calls and feeds them to the monitoring log buffer
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { addMonitoringLog } from '../routes/monitoring.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Wrap a logger to capture log entries for monitoring
|
|
10
|
+
* Note: Uses duck typing to avoid strict type checks (compatible with both Logger and memoryLogger)
|
|
11
|
+
*/
|
|
12
|
+
export function wrapLoggerForMonitoring(logger: any): any {
|
|
13
|
+
// Skip if this doesn't look like a standard logger (e.g., memoryLogger)
|
|
14
|
+
if (!logger || typeof logger.info !== 'function') {
|
|
15
|
+
return logger;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const originalInfo = logger.info.bind(logger);
|
|
19
|
+
const originalWarn = logger.warn.bind(logger);
|
|
20
|
+
const originalError = logger.error.bind(logger);
|
|
21
|
+
const originalDebug = logger.debug?.bind(logger);
|
|
22
|
+
|
|
23
|
+
logger.info = (message: string, data?: Record<string, unknown>) => {
|
|
24
|
+
addMonitoringLog({
|
|
25
|
+
timestamp: new Date().toISOString(),
|
|
26
|
+
level: 'info',
|
|
27
|
+
message,
|
|
28
|
+
context: logger.context || 'Gateway',
|
|
29
|
+
...(data && { data }),
|
|
30
|
+
});
|
|
31
|
+
return originalInfo(message, data);
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
logger.warn = (message: string, data?: Record<string, unknown>) => {
|
|
35
|
+
addMonitoringLog({
|
|
36
|
+
timestamp: new Date().toISOString(),
|
|
37
|
+
level: 'warn',
|
|
38
|
+
message,
|
|
39
|
+
context: logger.context || 'Gateway',
|
|
40
|
+
...(data && { data }),
|
|
41
|
+
});
|
|
42
|
+
return originalWarn(message, data);
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
logger.error = (message: string, data?: Record<string, unknown>) => {
|
|
46
|
+
addMonitoringLog({
|
|
47
|
+
timestamp: new Date().toISOString(),
|
|
48
|
+
level: 'error',
|
|
49
|
+
message,
|
|
50
|
+
context: logger.context || 'Gateway',
|
|
51
|
+
...(data && { data }),
|
|
52
|
+
});
|
|
53
|
+
return originalError(message, data);
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
if (originalDebug) {
|
|
57
|
+
logger.debug = (message: string, data?: Record<string, unknown>) => {
|
|
58
|
+
addMonitoringLog({
|
|
59
|
+
timestamp: new Date().toISOString(),
|
|
60
|
+
level: 'debug',
|
|
61
|
+
message,
|
|
62
|
+
context: logger.context || 'Gateway',
|
|
63
|
+
...(data && { data }),
|
|
64
|
+
});
|
|
65
|
+
return originalDebug(message, data);
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return logger;
|
|
70
|
+
}
|