@zintrust/workers 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/AnomalyDetection.d.ts +4 -0
- package/dist/AnomalyDetection.js +8 -0
- package/dist/BroadcastWorker.d.ts +2 -0
- package/dist/CanaryController.js +49 -5
- package/dist/ChaosEngineering.js +13 -0
- package/dist/ClusterLock.js +21 -10
- package/dist/DeadLetterQueue.js +12 -8
- package/dist/MultiQueueWorker.d.ts +1 -1
- package/dist/MultiQueueWorker.js +12 -7
- package/dist/NotificationWorker.d.ts +2 -0
- package/dist/PriorityQueue.d.ts +2 -2
- package/dist/PriorityQueue.js +20 -21
- package/dist/ResourceMonitor.js +65 -38
- package/dist/WorkerFactory.d.ts +23 -3
- package/dist/WorkerFactory.js +420 -40
- package/dist/WorkerInit.js +8 -3
- package/dist/WorkerMetrics.d.ts +2 -1
- package/dist/WorkerMetrics.js +152 -93
- package/dist/WorkerRegistry.d.ts +6 -0
- package/dist/WorkerRegistry.js +70 -1
- package/dist/WorkerShutdown.d.ts +21 -0
- package/dist/WorkerShutdown.js +82 -9
- package/dist/WorkerShutdownDurableObject.d.ts +12 -0
- package/dist/WorkerShutdownDurableObject.js +41 -0
- package/dist/build-manifest.json +171 -99
- package/dist/createQueueWorker.d.ts +2 -0
- package/dist/createQueueWorker.js +42 -27
- package/dist/dashboard/types.d.ts +5 -0
- package/dist/dashboard/workers-api.js +136 -43
- package/dist/http/WorkerApiController.js +1 -0
- package/dist/http/WorkerController.js +133 -85
- package/dist/http/WorkerMonitoringService.d.ts +11 -0
- package/dist/http/WorkerMonitoringService.js +62 -0
- package/dist/http/middleware/CustomValidation.js +1 -1
- package/dist/http/middleware/EditWorkerValidation.d.ts +1 -1
- package/dist/http/middleware/EditWorkerValidation.js +7 -6
- package/dist/http/middleware/ProcessorPathSanitizer.js +101 -35
- package/dist/http/middleware/WorkerValidationChain.js +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1 -0
- package/dist/routes/workers.js +48 -6
- package/dist/storage/WorkerStore.d.ts +4 -1
- package/dist/storage/WorkerStore.js +55 -7
- package/dist/telemetry/api/TelemetryAPI.d.ts +46 -0
- package/dist/telemetry/api/TelemetryAPI.js +219 -0
- package/dist/telemetry/api/TelemetryMonitoringService.d.ts +17 -0
- package/dist/telemetry/api/TelemetryMonitoringService.js +113 -0
- package/dist/telemetry/components/AlertPanel.d.ts +1 -0
- package/dist/telemetry/components/AlertPanel.js +13 -0
- package/dist/telemetry/components/CostTracking.d.ts +1 -0
- package/dist/telemetry/components/CostTracking.js +14 -0
- package/dist/telemetry/components/ResourceUsageChart.d.ts +1 -0
- package/dist/telemetry/components/ResourceUsageChart.js +11 -0
- package/dist/telemetry/components/WorkerHealthChart.d.ts +1 -0
- package/dist/telemetry/components/WorkerHealthChart.js +11 -0
- package/dist/telemetry/index.d.ts +15 -0
- package/dist/telemetry/index.js +60 -0
- package/dist/telemetry/routes/dashboard.d.ts +6 -0
- package/dist/telemetry/routes/dashboard.js +608 -0
- package/dist/ui/router/EmbeddedAssets.d.ts +4 -0
- package/dist/ui/router/EmbeddedAssets.js +13 -0
- package/dist/ui/router/ui.js +100 -4
- package/package.json +10 -6
- package/src/AnomalyDetection.ts +9 -0
- package/src/CanaryController.ts +41 -5
- package/src/ChaosEngineering.ts +14 -0
- package/src/ClusterLock.ts +22 -9
- package/src/DeadLetterQueue.ts +13 -8
- package/src/MultiQueueWorker.ts +15 -8
- package/src/PriorityQueue.ts +21 -22
- package/src/ResourceMonitor.ts +72 -40
- package/src/WorkerFactory.ts +545 -49
- package/src/WorkerInit.ts +8 -3
- package/src/WorkerMetrics.ts +183 -105
- package/src/WorkerRegistry.ts +80 -1
- package/src/WorkerShutdown.ts +115 -9
- package/src/WorkerShutdownDurableObject.ts +64 -0
- package/src/createQueueWorker.ts +73 -30
- package/src/dashboard/types.ts +5 -0
- package/src/dashboard/workers-api.ts +165 -52
- package/src/http/WorkerApiController.ts +1 -0
- package/src/http/WorkerController.ts +167 -90
- package/src/http/WorkerMonitoringService.ts +77 -0
- package/src/http/middleware/CustomValidation.ts +1 -1
- package/src/http/middleware/EditWorkerValidation.ts +7 -6
- package/src/http/middleware/ProcessorPathSanitizer.ts +123 -36
- package/src/http/middleware/WorkerValidationChain.ts +1 -0
- package/src/index.ts +6 -1
- package/src/routes/workers.ts +66 -9
- package/src/storage/WorkerStore.ts +59 -9
- package/src/telemetry/api/TelemetryAPI.ts +292 -0
- package/src/telemetry/api/TelemetryMonitoringService.ts +149 -0
- package/src/telemetry/components/AlertPanel.ts +13 -0
- package/src/telemetry/components/CostTracking.ts +14 -0
- package/src/telemetry/components/ResourceUsageChart.ts +11 -0
- package/src/telemetry/components/WorkerHealthChart.ts +11 -0
- package/src/telemetry/index.ts +121 -0
- package/src/telemetry/public/assets/zintrust-logo.svg +15 -0
- package/src/telemetry/routes/dashboard.ts +638 -0
- package/src/telemetry/styles/tailwind.css +1 -0
- package/src/telemetry/styles/zintrust-theme.css +8 -0
- package/src/ui/router/EmbeddedAssets.ts +13 -0
- package/src/ui/router/ui.ts +112 -5
- package/src/ui/workers/index.html +2 -2
- package/src/ui/workers/main.js +232 -61
- package/src/ui/workers/zintrust.svg +30 -0
- package/dist/dashboard/workers-dashboard-ui.d.ts +0 -3
- package/dist/dashboard/workers-dashboard-ui.js +0 -1026
- package/dist/dashboard/workers-dashboard.d.ts +0 -4
- package/dist/dashboard/workers-dashboard.js +0 -904
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import { Logger } from '@zintrust/core';
|
|
2
|
+
import type { WorkerData } from '../../dashboard';
|
|
3
|
+
import { getWorkers } from '../../dashboard/workers-api';
|
|
4
|
+
import { HealthMonitor } from '../../HealthMonitor';
|
|
5
|
+
import type { WorkerResourceUsage } from '../../ResourceMonitor';
|
|
6
|
+
import { ResourceMonitor } from '../../ResourceMonitor';
|
|
7
|
+
|
|
8
|
+
export type TelemetrySettings = {
|
|
9
|
+
enabled: boolean;
|
|
10
|
+
basePath: string;
|
|
11
|
+
middleware: ReadonlyArray<string>;
|
|
12
|
+
autoRefresh: boolean;
|
|
13
|
+
refreshIntervalMs: number;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type ResourceCurrentResponse = { ok: boolean; usage?: unknown };
|
|
17
|
+
export type SystemSummaryResponse = { ok: boolean; summary?: unknown };
|
|
18
|
+
|
|
19
|
+
const isOkWithUsage = (value: ResourceCurrentResponse): value is ResourceCurrentResponse =>
|
|
20
|
+
value.ok === true && 'usage' in value;
|
|
21
|
+
|
|
22
|
+
const isOkWithSummary = (value: SystemSummaryResponse): value is SystemSummaryResponse =>
|
|
23
|
+
value.ok === true && 'summary' in value;
|
|
24
|
+
|
|
25
|
+
export type ApiResponse<T> = { ok: boolean; error?: string } & T;
|
|
26
|
+
export type AlertRep = {
|
|
27
|
+
type: string;
|
|
28
|
+
severity: string;
|
|
29
|
+
message: string;
|
|
30
|
+
timestamp: string;
|
|
31
|
+
recommendation?: string;
|
|
32
|
+
};
|
|
33
|
+
// Helper function to create stopped worker alert
|
|
34
|
+
const createStoppedWorkerAlert = (worker: WorkerData): AlertRep => ({
|
|
35
|
+
type: 'worker-stopped',
|
|
36
|
+
severity: 'warning',
|
|
37
|
+
message: `Worker ${worker.name} is stopped`,
|
|
38
|
+
timestamp: worker.health?.lastCheck || new Date().toISOString(),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// Helper function to create health check alert
|
|
42
|
+
const createHealthCheckAlert = (worker: WorkerData): AlertRep | null => {
|
|
43
|
+
const check = worker.health?.checks?.[0];
|
|
44
|
+
if (!check) return null;
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
type: 'health-check-failed',
|
|
48
|
+
severity: check.status === 'fail' ? 'critical' : 'warning',
|
|
49
|
+
message: check.message || `Health check failed: ${check.name}`,
|
|
50
|
+
timestamp: worker.health?.lastCheck || new Date().toISOString(),
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
// Helper function to generate worker alerts
|
|
55
|
+
const generateWorkerAlerts = (
|
|
56
|
+
workers: WorkerData[]
|
|
57
|
+
): {
|
|
58
|
+
workerName: string;
|
|
59
|
+
status: string;
|
|
60
|
+
healthStatus: string;
|
|
61
|
+
lastCheck: string;
|
|
62
|
+
checks: WorkerData['health']['checks'];
|
|
63
|
+
alert: AlertRep | null;
|
|
64
|
+
}[] => {
|
|
65
|
+
return workers
|
|
66
|
+
.filter((w) => {
|
|
67
|
+
return (
|
|
68
|
+
w.status !== 'running' ||
|
|
69
|
+
w.health?.status !== 'healthy' ||
|
|
70
|
+
(w.health?.checks && w.health.checks.length > 0)
|
|
71
|
+
);
|
|
72
|
+
})
|
|
73
|
+
.map((w) => {
|
|
74
|
+
const workerData = {
|
|
75
|
+
workerName: w.name,
|
|
76
|
+
status: w.status,
|
|
77
|
+
healthStatus: w.health?.status || 'unknown',
|
|
78
|
+
lastCheck: w.health?.lastCheck || new Date().toISOString(),
|
|
79
|
+
checks: w.health?.checks || [],
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
let alert = null;
|
|
83
|
+
if (w.status === 'stopped') {
|
|
84
|
+
alert = createStoppedWorkerAlert(w);
|
|
85
|
+
} else if (w.health?.checks && w.health.checks.length > 0) {
|
|
86
|
+
alert = createHealthCheckAlert(w);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
...workerData,
|
|
91
|
+
alert,
|
|
92
|
+
};
|
|
93
|
+
})
|
|
94
|
+
.filter((w) => w.alert !== null);
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
// Helper function to generate resource alerts
|
|
98
|
+
const generateResourceAlerts = (resourceUsage: WorkerResourceUsage): AlertRep[] => {
|
|
99
|
+
const resourceAlerts = [];
|
|
100
|
+
const cpuUsage = resourceUsage.resourceSnapshot?.cpu?.usage || 0;
|
|
101
|
+
const memoryUsage = resourceUsage.resourceSnapshot?.memory?.usage || 0;
|
|
102
|
+
|
|
103
|
+
if (cpuUsage > 90) {
|
|
104
|
+
resourceAlerts.push({
|
|
105
|
+
type: 'cpu-high',
|
|
106
|
+
severity: 'critical',
|
|
107
|
+
message: `Critical CPU usage: ${cpuUsage.toFixed(1)}%`,
|
|
108
|
+
timestamp: new Date().toISOString(),
|
|
109
|
+
recommendation: 'Consider scaling up or optimizing worker code',
|
|
110
|
+
});
|
|
111
|
+
} else if (cpuUsage > 80) {
|
|
112
|
+
resourceAlerts.push({
|
|
113
|
+
type: 'cpu-high',
|
|
114
|
+
severity: 'warning',
|
|
115
|
+
message: `High CPU usage: ${cpuUsage.toFixed(1)}%`,
|
|
116
|
+
timestamp: new Date().toISOString(),
|
|
117
|
+
recommendation: 'Monitor closely and consider scaling',
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (memoryUsage > 95) {
|
|
122
|
+
resourceAlerts.push({
|
|
123
|
+
type: 'memory-high',
|
|
124
|
+
severity: 'critical',
|
|
125
|
+
message: `Critical memory usage: ${memoryUsage.toFixed(1)}%`,
|
|
126
|
+
timestamp: new Date().toISOString(),
|
|
127
|
+
recommendation: 'Increase memory allocation or optimize memory usage',
|
|
128
|
+
});
|
|
129
|
+
} else if (memoryUsage > 85) {
|
|
130
|
+
resourceAlerts.push({
|
|
131
|
+
type: 'memory-high',
|
|
132
|
+
severity: 'warning',
|
|
133
|
+
message: `High memory usage: ${memoryUsage.toFixed(1)}%`,
|
|
134
|
+
timestamp: new Date().toISOString(),
|
|
135
|
+
recommendation: 'Monitor memory usage closely',
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return resourceAlerts;
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Helper function to calculate monitoring summary
|
|
143
|
+
const calculateMonitoringSummary = (
|
|
144
|
+
runningWorkers: WorkerData[]
|
|
145
|
+
): {
|
|
146
|
+
total: number;
|
|
147
|
+
healthy: number;
|
|
148
|
+
degraded: number;
|
|
149
|
+
critical: number;
|
|
150
|
+
details: {
|
|
151
|
+
workerName: string;
|
|
152
|
+
status: string;
|
|
153
|
+
lastCheck: string;
|
|
154
|
+
checks: WorkerData['health']['checks'];
|
|
155
|
+
}[];
|
|
156
|
+
} => {
|
|
157
|
+
const healthyCount = runningWorkers.filter((w) => w.health?.status === 'healthy').length;
|
|
158
|
+
const degradedCount = runningWorkers.filter((w) => w.health?.status === 'warning').length;
|
|
159
|
+
const criticalCount = runningWorkers.filter((w) => w.health?.status === 'unhealthy').length;
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
total: runningWorkers.length,
|
|
163
|
+
healthy: healthyCount,
|
|
164
|
+
degraded: degradedCount,
|
|
165
|
+
critical: criticalCount,
|
|
166
|
+
details: runningWorkers.map((w) => ({
|
|
167
|
+
workerName: w.name,
|
|
168
|
+
status: w.health?.status || 'unknown',
|
|
169
|
+
lastCheck: w.health?.lastCheck || new Date().toISOString(),
|
|
170
|
+
checks: w.health?.checks || [],
|
|
171
|
+
})),
|
|
172
|
+
};
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
export const TelemetryAPI = Object.freeze({
|
|
176
|
+
async getSystemSummary(): Promise<ApiResponse<{ summary: unknown }>> {
|
|
177
|
+
try {
|
|
178
|
+
// Get all workers (both running and stopped) for complete alert visibility
|
|
179
|
+
const workersResult = await getWorkers({});
|
|
180
|
+
const runningWorkers = workersResult.workers.filter((w) => w.status === 'running');
|
|
181
|
+
|
|
182
|
+
// Calculate monitoring summary from running workers only
|
|
183
|
+
const monitoringSummary = calculateMonitoringSummary(runningWorkers);
|
|
184
|
+
|
|
185
|
+
// Generate alerts from workers and resources
|
|
186
|
+
const workerAlerts = generateWorkerAlerts(workersResult.workers);
|
|
187
|
+
const resourceUsage = ResourceMonitor.getCurrentUsage('system');
|
|
188
|
+
const resourceAlerts = generateResourceAlerts(resourceUsage);
|
|
189
|
+
|
|
190
|
+
// Combine all alerts
|
|
191
|
+
const allAlerts = [...workerAlerts, ...resourceAlerts];
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
ok: true,
|
|
195
|
+
summary: {
|
|
196
|
+
workers: runningWorkers.length,
|
|
197
|
+
monitoring: {
|
|
198
|
+
...monitoringSummary,
|
|
199
|
+
alerts: allAlerts, // Include all alerts (workers + resources)
|
|
200
|
+
},
|
|
201
|
+
resources: resourceUsage,
|
|
202
|
+
alerts: allAlerts, // Top-level alerts for easy access
|
|
203
|
+
},
|
|
204
|
+
};
|
|
205
|
+
} catch (error) {
|
|
206
|
+
Logger.error('Failed to get system summary', error as Error);
|
|
207
|
+
return {
|
|
208
|
+
ok: false,
|
|
209
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
210
|
+
summary: {},
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
},
|
|
214
|
+
|
|
215
|
+
async getMonitoringSummary(): Promise<ApiResponse<{ summary: unknown }>> {
|
|
216
|
+
try {
|
|
217
|
+
const summary = await HealthMonitor.getSummary();
|
|
218
|
+
return { ok: true, summary };
|
|
219
|
+
} catch (error) {
|
|
220
|
+
Logger.error('Failed to get monitoring summary', error as Error);
|
|
221
|
+
return {
|
|
222
|
+
ok: false,
|
|
223
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
224
|
+
summary: {},
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
},
|
|
228
|
+
|
|
229
|
+
async getResourceCurrent(): Promise<ApiResponse<{ usage: unknown }>> {
|
|
230
|
+
try {
|
|
231
|
+
const usage = ResourceMonitor.getCurrentUsage('system');
|
|
232
|
+
return { ok: true, usage };
|
|
233
|
+
} catch (error) {
|
|
234
|
+
Logger.error('Failed to get resource usage', error as Error);
|
|
235
|
+
return {
|
|
236
|
+
ok: false,
|
|
237
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
238
|
+
usage: null,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
},
|
|
242
|
+
|
|
243
|
+
async getResourceTrends(): Promise<ApiResponse<{ trends: unknown }>> {
|
|
244
|
+
try {
|
|
245
|
+
const trends = ResourceMonitor.getAllTrends('system', 'day');
|
|
246
|
+
return { ok: true, trends };
|
|
247
|
+
} catch (error) {
|
|
248
|
+
Logger.error('Failed to get resource trends', error as Error);
|
|
249
|
+
return {
|
|
250
|
+
ok: false,
|
|
251
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
252
|
+
trends: null,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
},
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
export function createSnapshotBuilder() {
|
|
259
|
+
return async (): Promise<{
|
|
260
|
+
ok: boolean;
|
|
261
|
+
summary: unknown;
|
|
262
|
+
resources: unknown;
|
|
263
|
+
cost: unknown;
|
|
264
|
+
}> => {
|
|
265
|
+
const [systemSummaryResult, resourceCurrentResult] = await Promise.allSettled([
|
|
266
|
+
TelemetryAPI.getSystemSummary(),
|
|
267
|
+
TelemetryAPI.getResourceCurrent(),
|
|
268
|
+
]);
|
|
269
|
+
|
|
270
|
+
if (systemSummaryResult.status === 'rejected') {
|
|
271
|
+
Logger.error('Telemetry dashboard summary failed', systemSummaryResult.reason);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (resourceCurrentResult.status === 'rejected') {
|
|
275
|
+
Logger.error('Telemetry resource summary failed', resourceCurrentResult.reason);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const systemSummary: SystemSummaryResponse =
|
|
279
|
+
systemSummaryResult.status === 'fulfilled' ? systemSummaryResult.value : { ok: false };
|
|
280
|
+
const resourceCurrent =
|
|
281
|
+
resourceCurrentResult.status === 'fulfilled'
|
|
282
|
+
? resourceCurrentResult.value
|
|
283
|
+
: ({ ok: false } as ResourceCurrentResponse);
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
ok: systemSummary.ok ?? false,
|
|
287
|
+
summary: isOkWithSummary(systemSummary) ? systemSummary.summary : {},
|
|
288
|
+
resources: isOkWithUsage(resourceCurrent) ? resourceCurrent.usage : null,
|
|
289
|
+
cost: null,
|
|
290
|
+
};
|
|
291
|
+
};
|
|
292
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import type { IResponse } from '@zintrust/core';
|
|
2
|
+
import { Logger, NodeSingletons } from '@zintrust/core';
|
|
3
|
+
import type { createSnapshotBuilder, TelemetrySettings } from './TelemetryAPI';
|
|
4
|
+
|
|
5
|
+
export type TelemetrySnapshotData = {
|
|
6
|
+
type: string;
|
|
7
|
+
ts: string;
|
|
8
|
+
ok: boolean;
|
|
9
|
+
summary: unknown;
|
|
10
|
+
resources: unknown;
|
|
11
|
+
cost: unknown;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
// Internal state for singleton service
|
|
15
|
+
const emitter = new NodeSingletons.EventEmitter();
|
|
16
|
+
emitter.setMaxListeners(Infinity);
|
|
17
|
+
let interval: NodeJS.Timeout | null = null;
|
|
18
|
+
let subscribers = 0;
|
|
19
|
+
let currentSettings: TelemetrySettings | null = null;
|
|
20
|
+
let currentBuildSnapshot: ReturnType<typeof createSnapshotBuilder> | null = null;
|
|
21
|
+
|
|
22
|
+
const broadcastTelemetrySnapshot = async (): Promise<void> => {
|
|
23
|
+
try {
|
|
24
|
+
if (subscribers <= 0 || !currentBuildSnapshot || !currentSettings) return;
|
|
25
|
+
|
|
26
|
+
const snapshot = await currentBuildSnapshot();
|
|
27
|
+
|
|
28
|
+
const payload: TelemetrySnapshotData = {
|
|
29
|
+
type: 'snapshot',
|
|
30
|
+
ts: new Date().toISOString(),
|
|
31
|
+
...snapshot,
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
emitter.emit('snapshot', payload);
|
|
35
|
+
} catch (err) {
|
|
36
|
+
Logger.error('TelemetryMonitoringService.broadcastSnapshot failed', err);
|
|
37
|
+
emitter.emit('error', err);
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const startPolling = (): void => {
|
|
42
|
+
if (interval || !currentSettings) return;
|
|
43
|
+
|
|
44
|
+
Logger.debug('Starting TelemetryMonitoringService polling');
|
|
45
|
+
// Initial fetch
|
|
46
|
+
void broadcastTelemetrySnapshot();
|
|
47
|
+
|
|
48
|
+
interval = setInterval(() => {
|
|
49
|
+
void broadcastTelemetrySnapshot();
|
|
50
|
+
}, currentSettings.refreshIntervalMs);
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const stopPolling = (): void => {
|
|
54
|
+
if (interval) {
|
|
55
|
+
Logger.debug('Stopping TelemetryMonitoringService polling');
|
|
56
|
+
clearInterval(interval);
|
|
57
|
+
interval = null;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
export const TelemetryMonitoringService = Object.freeze({
|
|
62
|
+
subscribe(callback: (data: TelemetrySnapshotData) => void): void {
|
|
63
|
+
emitter.on('snapshot', callback);
|
|
64
|
+
subscribers++;
|
|
65
|
+
if (subscribers === 1) {
|
|
66
|
+
startPolling();
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
unsubscribe(callback: (data: TelemetrySnapshotData) => void): void {
|
|
71
|
+
emitter.off('snapshot', callback);
|
|
72
|
+
subscribers--;
|
|
73
|
+
if (subscribers <= 0) {
|
|
74
|
+
stopPolling();
|
|
75
|
+
currentSettings = null;
|
|
76
|
+
currentBuildSnapshot = null;
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
|
|
80
|
+
startMonitoring(
|
|
81
|
+
settings: TelemetrySettings,
|
|
82
|
+
buildSnapshot: ReturnType<typeof createSnapshotBuilder>
|
|
83
|
+
): void {
|
|
84
|
+
if (subscribers === 0) {
|
|
85
|
+
currentSettings = settings;
|
|
86
|
+
currentBuildSnapshot = buildSnapshot;
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
|
|
90
|
+
stopMonitoring(): void {
|
|
91
|
+
if (subscribers <= 0) {
|
|
92
|
+
stopPolling();
|
|
93
|
+
currentSettings = null;
|
|
94
|
+
currentBuildSnapshot = null;
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
export const teleStream = async (
|
|
100
|
+
res: IResponse,
|
|
101
|
+
settings: TelemetrySettings,
|
|
102
|
+
buildSnapshot: ReturnType<typeof createSnapshotBuilder>
|
|
103
|
+
): Promise<void> => {
|
|
104
|
+
const raw = res.getRaw();
|
|
105
|
+
|
|
106
|
+
raw.writeHead(200, {
|
|
107
|
+
'Content-Type': 'text/event-stream',
|
|
108
|
+
'Cache-Control': 'no-cache, no-transform',
|
|
109
|
+
Connection: 'keep-alive',
|
|
110
|
+
'X-Accel-Buffering': 'no',
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
let closed = false;
|
|
114
|
+
|
|
115
|
+
const send = async (payload: unknown): Promise<void> => {
|
|
116
|
+
try {
|
|
117
|
+
raw.write(`data: ${JSON.stringify(payload)}\n\n`);
|
|
118
|
+
} catch (err) {
|
|
119
|
+
Logger.error('Telemetry SSE send failed', err);
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
// Send hello immediately
|
|
124
|
+
await send({ type: 'hello', ts: new Date().toISOString() });
|
|
125
|
+
|
|
126
|
+
// Start monitoring with the singleton service
|
|
127
|
+
TelemetryMonitoringService.startMonitoring(settings, buildSnapshot);
|
|
128
|
+
|
|
129
|
+
// Subscribe to telemetry snapshots
|
|
130
|
+
const onSnapshot = (data: TelemetrySnapshotData): void => {
|
|
131
|
+
if (!closed) {
|
|
132
|
+
void send(data);
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
TelemetryMonitoringService.subscribe(onSnapshot);
|
|
137
|
+
|
|
138
|
+
// Heartbeat to keep connection alive
|
|
139
|
+
const hb = setInterval(() => {
|
|
140
|
+
if (!closed) raw.write(': ping\n\n');
|
|
141
|
+
}, 15000);
|
|
142
|
+
|
|
143
|
+
raw.on('close', () => {
|
|
144
|
+
closed = true;
|
|
145
|
+
clearInterval(hb);
|
|
146
|
+
TelemetryMonitoringService.unsubscribe(onSnapshot);
|
|
147
|
+
TelemetryMonitoringService.stopMonitoring();
|
|
148
|
+
});
|
|
149
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export const renderAlertPanel = (): string => {
|
|
2
|
+
return `
|
|
3
|
+
<div class="zt-card">
|
|
4
|
+
<div class="zt-card-header">
|
|
5
|
+
<h3 class="zt-card-title">Alert History</h3>
|
|
6
|
+
<span class="zt-card-meta">Latest events</span>
|
|
7
|
+
</div>
|
|
8
|
+
<ul id="alertList" class="zt-alert-list">
|
|
9
|
+
<li class="zt-alert-item">No alerts yet.</li>
|
|
10
|
+
</ul>
|
|
11
|
+
</div>
|
|
12
|
+
`;
|
|
13
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export const renderCostTracking = (): string => {
|
|
2
|
+
return `
|
|
3
|
+
<div class="zt-card">
|
|
4
|
+
<div class="zt-card-header">
|
|
5
|
+
<h3 class="zt-card-title">Cost Tracking</h3>
|
|
6
|
+
<span class="zt-card-meta">Daily estimate</span>
|
|
7
|
+
</div>
|
|
8
|
+
<div class="zt-card-body">
|
|
9
|
+
<p class="zt-cost-value" id="costTotal">$0.00</p>
|
|
10
|
+
<p class="zt-card-meta">Auto-updated from resource metrics</p>
|
|
11
|
+
</div>
|
|
12
|
+
</div>
|
|
13
|
+
`;
|
|
14
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const renderResourceUsageChart = (): string => {
|
|
2
|
+
return `
|
|
3
|
+
<div class="zt-card">
|
|
4
|
+
<div class="zt-card-header">
|
|
5
|
+
<h3 class="zt-card-title">Resource Usage</h3>
|
|
6
|
+
<span class="zt-card-meta">Current snapshot</span>
|
|
7
|
+
</div>
|
|
8
|
+
<canvas id="resourceUsageChart" class="zt-chart"></canvas>
|
|
9
|
+
</div>
|
|
10
|
+
`;
|
|
11
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const renderWorkerHealthChart = (): string => {
|
|
2
|
+
return `
|
|
3
|
+
<div class="zt-card">
|
|
4
|
+
<div class="zt-card-header">
|
|
5
|
+
<h3 class="zt-card-title">Worker Health</h3>
|
|
6
|
+
<span class="zt-card-meta">Last 24h</span>
|
|
7
|
+
</div>
|
|
8
|
+
<canvas id="workerHealthChart" class="zt-chart"></canvas>
|
|
9
|
+
</div>
|
|
10
|
+
`;
|
|
11
|
+
};
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import type { IRequest, IResponse, IRouter, RouteOptions } from '@zintrust/core';
|
|
2
|
+
import { Router } from '@zintrust/core';
|
|
3
|
+
import type { TelemetrySettings } from './api/TelemetryAPI';
|
|
4
|
+
import { createSnapshotBuilder } from './api/TelemetryAPI';
|
|
5
|
+
import { teleStream } from './api/TelemetryMonitoringService';
|
|
6
|
+
import { getDashboardHtml } from './routes/dashboard';
|
|
7
|
+
|
|
8
|
+
export type TelemetryDashboardConfig = {
|
|
9
|
+
enabled?: boolean;
|
|
10
|
+
basePath?: string;
|
|
11
|
+
middleware?: ReadonlyArray<string>;
|
|
12
|
+
autoRefresh?: boolean;
|
|
13
|
+
refreshIntervalMs?: number;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const DEFAULTS = {
|
|
17
|
+
enabled: true,
|
|
18
|
+
basePath: '/telemetry',
|
|
19
|
+
autoRefresh: true,
|
|
20
|
+
refreshIntervalMs: 10000,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export type TelemetryDashboardApi = {
|
|
24
|
+
registerRoutes: (router: IRouter) => void;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export const TelemetryDashboard = Object.freeze({
|
|
28
|
+
create(config: TelemetryDashboardConfig): TelemetryDashboardApi {
|
|
29
|
+
const settings = buildSettings(config);
|
|
30
|
+
const buildSnapshot = createSnapshotBuilder();
|
|
31
|
+
const registerRoutes = createRouteRegistrar(settings, buildSnapshot);
|
|
32
|
+
|
|
33
|
+
return Object.freeze({ registerRoutes });
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
function buildSettings(config: TelemetryDashboardConfig): TelemetrySettings {
|
|
38
|
+
return {
|
|
39
|
+
enabled: config.enabled ?? DEFAULTS.enabled,
|
|
40
|
+
basePath: config.basePath ?? DEFAULTS.basePath,
|
|
41
|
+
middleware: config.middleware ?? [],
|
|
42
|
+
autoRefresh: config.autoRefresh ?? DEFAULTS.autoRefresh,
|
|
43
|
+
refreshIntervalMs:
|
|
44
|
+
typeof config.refreshIntervalMs === 'number' && Number.isFinite(config.refreshIntervalMs)
|
|
45
|
+
? Math.max(1000, Math.floor(config.refreshIntervalMs))
|
|
46
|
+
: DEFAULTS.refreshIntervalMs,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function createRouteRegistrar(
|
|
51
|
+
settings: TelemetrySettings,
|
|
52
|
+
buildSnapshot: ReturnType<typeof createSnapshotBuilder>
|
|
53
|
+
) {
|
|
54
|
+
return (router: IRouter): void => {
|
|
55
|
+
if (!settings.enabled) return;
|
|
56
|
+
|
|
57
|
+
const routeOptions: RouteOptions = (
|
|
58
|
+
settings.middleware.length > 0 ? { middleware: settings.middleware } : undefined
|
|
59
|
+
) as RouteOptions;
|
|
60
|
+
|
|
61
|
+
registerDashboardRoute(router, settings, routeOptions);
|
|
62
|
+
registerSummaryApi(router, settings, routeOptions, buildSnapshot);
|
|
63
|
+
registerEventsApi(router, settings, routeOptions, buildSnapshot);
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function registerDashboardRoute(
|
|
68
|
+
router: IRouter,
|
|
69
|
+
settings: TelemetrySettings,
|
|
70
|
+
routeOptions: RouteOptions
|
|
71
|
+
): void {
|
|
72
|
+
Router.get(
|
|
73
|
+
router,
|
|
74
|
+
settings.basePath,
|
|
75
|
+
(_req, res) => {
|
|
76
|
+
res.html(
|
|
77
|
+
getDashboardHtml({
|
|
78
|
+
basePath: settings.basePath,
|
|
79
|
+
autoRefresh: settings.autoRefresh,
|
|
80
|
+
refreshIntervalMs: settings.refreshIntervalMs,
|
|
81
|
+
})
|
|
82
|
+
);
|
|
83
|
+
},
|
|
84
|
+
routeOptions
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function registerSummaryApi(
|
|
89
|
+
router: IRouter,
|
|
90
|
+
settings: { basePath: string },
|
|
91
|
+
routeOptions: RouteOptions,
|
|
92
|
+
buildSnapshot: ReturnType<typeof createSnapshotBuilder>
|
|
93
|
+
): void {
|
|
94
|
+
Router.get(
|
|
95
|
+
router,
|
|
96
|
+
`${settings.basePath}/api/summary`,
|
|
97
|
+
async (_req, res) => {
|
|
98
|
+
const snapshot = await buildSnapshot();
|
|
99
|
+
res.json(snapshot);
|
|
100
|
+
},
|
|
101
|
+
routeOptions
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function registerEventsApi(
|
|
106
|
+
router: IRouter,
|
|
107
|
+
settings: TelemetrySettings,
|
|
108
|
+
routeOptions: RouteOptions,
|
|
109
|
+
buildSnapshot: ReturnType<typeof createSnapshotBuilder>
|
|
110
|
+
): void {
|
|
111
|
+
Router.get(
|
|
112
|
+
router,
|
|
113
|
+
`${settings.basePath}/api/events`,
|
|
114
|
+
async (_req: IRequest, res: IResponse) => {
|
|
115
|
+
teleStream(res, settings, buildSnapshot);
|
|
116
|
+
},
|
|
117
|
+
routeOptions
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export default TelemetryDashboard;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
<svg width="96" height="96" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="zt-logo" x1="10" y1="50" x2="90" y2="50" gradientUnits="userSpaceOnUse">
|
|
4
|
+
<stop stop-color="#22c55e" />
|
|
5
|
+
<stop offset="1" stop-color="#38bdf8" />
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<circle cx="50" cy="50" r="34" stroke="rgba(255,255,255,0.16)" stroke-width="4" />
|
|
9
|
+
<ellipse cx="50" cy="50" rx="40" ry="18" stroke="url(#zt-logo)" stroke-width="4" />
|
|
10
|
+
<ellipse cx="50" cy="50" rx="18" ry="40" stroke="url(#zt-logo)" stroke-width="4" opacity="0.75" />
|
|
11
|
+
<circle cx="50" cy="50" r="6" fill="url(#zt-logo)" />
|
|
12
|
+
<path d="M40 52C35 52 32 49 32 44C32 39 35 36 40 36H48" stroke="white" stroke-width="6" stroke-linecap="round" />
|
|
13
|
+
<path d="M60 48C65 48 68 51 68 56C68 61 65 64 60 64H52" stroke="white" stroke-width="6" stroke-linecap="round" />
|
|
14
|
+
<path d="M44 50H56" stroke="rgba(255,255,255,0.22)" stroke-width="6" stroke-linecap="round" />
|
|
15
|
+
</svg>
|