claude-flow-novice 1.6.2 → 1.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +16 -5
- package/.claude/settings.local.json +3 -2
- package/.claude-flow-novice/dist/src/api/auth-service.js +84 -38
- package/.claude-flow-novice/dist/src/api/auth-service.js.map +1 -1
- package/.claude-flow-novice/dist/src/coordination/index.js +3 -0
- package/.claude-flow-novice/dist/src/coordination/index.js.map +1 -1
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/interfaces/v1-transparency-system.js +12 -0
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/interfaces/v1-transparency-system.js.map +1 -0
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/v1-to-v2-bridge.js +433 -0
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/v1-to-v2-bridge.js.map +1 -0
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/v1-transparency-adapter.js +1468 -0
- package/.claude-flow-novice/dist/src/coordination/v1-transparency/v1-transparency-adapter.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/apm-integration.js +724 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/apm-integration.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/datadog-collector.js +363 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/datadog-collector.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/index.js +97 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/index.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/newrelic-collector.js +384 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/newrelic-collector.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/performance-optimizer.js +612 -0
- package/.claude-flow-novice/dist/src/monitoring/apm/performance-optimizer.js.map +1 -0
- package/.claude-flow-novice/dist/src/monitoring/metrics-collector.js +282 -0
- package/.claude-flow-novice/dist/src/monitoring/metrics-collector.js.map +1 -0
- package/.claude-flow-novice/dist/src/providers/provider-manager.js +5 -3
- package/.claude-flow-novice/dist/src/providers/provider-manager.js.map +1 -1
- package/.claude-flow-novice/dist/src/providers/tiered-router.js +9 -17
- package/.claude-flow-novice/dist/src/providers/tiered-router.js.map +1 -1
- package/.claude-flow-novice/dist/src/web/api/apm-routes.js +355 -0
- package/.claude-flow-novice/dist/src/web/api/apm-routes.js.map +1 -0
- package/.claude-flow-novice/dist/src/web/frontend/src/utils/security.js +425 -0
- package/.claude-flow-novice/dist/src/web/frontend/src/utils/security.js.map +1 -0
- package/.claude-flow-novice/dist/src/web/security/security-middleware.js +379 -0
- package/.claude-flow-novice/dist/src/web/security/security-middleware.js.map +1 -0
- package/.claude-flow-novice/dist/src/web/websocket/apm-websocket-handler.js +441 -0
- package/.claude-flow-novice/dist/src/web/websocket/apm-websocket-handler.js.map +1 -0
- package/.claude-flow-novice/dist/src/web/websocket/websocket-manager.js +255 -1
- package/.claude-flow-novice/dist/src/web/websocket/websocket-manager.js.map +1 -1
- package/.claude-flow-novice/metrics.db +0 -0
- package/AGENT_PERFORMANCE_GUIDELINES.md +88 -0
- package/CLAUDE.md +103 -3
- package/config/hooks/post-edit-pipeline.js +68 -118
- package/config/hooks/pre-tool-memory-safety.js +209 -0
- package/package.json +9 -4
- package/scripts/cleanup-idle-sessions.sh +59 -0
- package/scripts/monitor-loop.sh +65 -0
- package/scripts/monitor-memory.sh +47 -0
- package/scripts/monitor.py +43 -0
- package/scripts/test-provider-routing.cjs +7 -9
- package/wiki/Provider-Routing.md +57 -69
- package/.claude-flow-novice/metrics.db-shm +0 -0
- package/.claude-flow-novice/metrics.db-wal +0 -0
|
@@ -0,0 +1,724 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advanced APM Integration Manager for Claude Flow Novice
|
|
3
|
+
* Coordinates DataDog, New Relic, and custom monitoring systems
|
|
4
|
+
*/ import { Logger } from '../../utils/logger.js';
|
|
5
|
+
import { DataDogCollector } from './datadog-collector.js';
|
|
6
|
+
import { NewRelicCollector } from './newrelic-collector.js';
|
|
7
|
+
import { DistributedTracer } from './distributed-tracing.js';
|
|
8
|
+
import { PerformanceOptimizer } from './performance-optimizer.js';
|
|
9
|
+
export class APMIntegration {
|
|
10
|
+
logger;
|
|
11
|
+
config;
|
|
12
|
+
dataDogCollector;
|
|
13
|
+
newRelicCollector;
|
|
14
|
+
distributedTracer;
|
|
15
|
+
performanceOptimizer;
|
|
16
|
+
customMetricsInterval;
|
|
17
|
+
healthCheckInterval;
|
|
18
|
+
constructor(config = {}){
|
|
19
|
+
this.logger = new Logger('APMIntegration');
|
|
20
|
+
this.config = {
|
|
21
|
+
distributedTracing: {
|
|
22
|
+
enabled: true,
|
|
23
|
+
samplingRate: 1.0
|
|
24
|
+
},
|
|
25
|
+
performanceOptimization: {
|
|
26
|
+
enabled: true,
|
|
27
|
+
monitoringInterval: 5000
|
|
28
|
+
},
|
|
29
|
+
customMetrics: {
|
|
30
|
+
enabled: true,
|
|
31
|
+
interval: 10000
|
|
32
|
+
},
|
|
33
|
+
alerting: {
|
|
34
|
+
enabled: true
|
|
35
|
+
},
|
|
36
|
+
...config
|
|
37
|
+
};
|
|
38
|
+
this.initializeCollectors();
|
|
39
|
+
this.startCustomMetrics();
|
|
40
|
+
this.startHealthChecks();
|
|
41
|
+
// Register automatic cleanup on process termination
|
|
42
|
+
process.on('SIGTERM', ()=>this.shutdown());
|
|
43
|
+
process.on('SIGINT', ()=>this.shutdown());
|
|
44
|
+
process.on('beforeExit', ()=>this.shutdown());
|
|
45
|
+
this.logger.info('APM auto-shutdown hooks registered');
|
|
46
|
+
}
|
|
47
|
+
initializeCollectors() {
|
|
48
|
+
// Initialize DataDog if configured
|
|
49
|
+
if (this.config.dataDog?.enabled !== false && this.config.dataDog?.apiKey) {
|
|
50
|
+
try {
|
|
51
|
+
this.dataDogCollector = new DataDogCollector(this.config.dataDog);
|
|
52
|
+
this.logger.info('DataDog collector initialized');
|
|
53
|
+
} catch (error) {
|
|
54
|
+
this.logger.error('Failed to initialize DataDog collector', {
|
|
55
|
+
error: error.message
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Initialize New Relic if configured
|
|
60
|
+
if (this.config.newRelic?.enabled !== false && this.config.newRelic?.licenseKey) {
|
|
61
|
+
try {
|
|
62
|
+
this.newRelicCollector = new NewRelicCollector(this.config.newRelic);
|
|
63
|
+
this.logger.info('New Relic collector initialized');
|
|
64
|
+
} catch (error) {
|
|
65
|
+
this.logger.error('Failed to initialize New Relic collector', {
|
|
66
|
+
error: error.message
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Initialize distributed tracing
|
|
71
|
+
if (this.config.distributedTracing?.enabled !== false) {
|
|
72
|
+
try {
|
|
73
|
+
this.distributedTracer = new DistributedTracer(this.dataDogCollector, this.newRelicCollector, {
|
|
74
|
+
samplingRate: this.config.distributedTracing.samplingRate
|
|
75
|
+
});
|
|
76
|
+
this.logger.info('Distributed tracing initialized');
|
|
77
|
+
} catch (error) {
|
|
78
|
+
this.logger.error('Failed to initialize distributed tracing', {
|
|
79
|
+
error: error.message
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// Initialize performance optimizer
|
|
84
|
+
if (this.config.performanceOptimization?.enabled !== false) {
|
|
85
|
+
try {
|
|
86
|
+
this.performanceOptimizer = new PerformanceOptimizer(this.dataDogCollector, this.newRelicCollector);
|
|
87
|
+
this.logger.info('Performance optimizer initialized');
|
|
88
|
+
} catch (error) {
|
|
89
|
+
this.logger.error('Failed to initialize performance optimizer', {
|
|
90
|
+
error: error.message
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// Agent Lifecycle Tracing
|
|
96
|
+
traceAgentLifecycle(agentType, lifecycleEvent, agentId, metadata) {
|
|
97
|
+
if (!this.distributedTracer) return;
|
|
98
|
+
const traceContext = this.distributedTracer.traceAgentOperation(agentType, lifecycleEvent, undefined, {
|
|
99
|
+
'agent.id': agentId || 'unknown',
|
|
100
|
+
'lifecycle.event': lifecycleEvent,
|
|
101
|
+
...metadata
|
|
102
|
+
});
|
|
103
|
+
// Record metrics
|
|
104
|
+
this.recordAgentLifecycleMetric(agentType, lifecycleEvent, metadata);
|
|
105
|
+
// Finish span
|
|
106
|
+
setTimeout(()=>{
|
|
107
|
+
this.distributedTracer.finishSpan(traceContext, {
|
|
108
|
+
'agent.type': agentType,
|
|
109
|
+
'lifecycle.event': lifecycleEvent
|
|
110
|
+
});
|
|
111
|
+
}, 0);
|
|
112
|
+
}
|
|
113
|
+
recordAgentLifecycleMetric(agentType, lifecycleEvent, metadata) {
|
|
114
|
+
const tags = {
|
|
115
|
+
'agent.type': agentType,
|
|
116
|
+
'lifecycle.event': lifecycleEvent,
|
|
117
|
+
...metadata
|
|
118
|
+
};
|
|
119
|
+
if (this.dataDogCollector) {
|
|
120
|
+
this.dataDogCollector.count('agent.lifecycle.events', 1, tags);
|
|
121
|
+
}
|
|
122
|
+
if (this.newRelicCollector) {
|
|
123
|
+
this.newRelicCollector.recordMetric('AgentLifecycleEvent', 1, 'count', tags);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// Swarm Activity Monitoring
|
|
127
|
+
traceSwarmActivity(swarmId, activity, topology, agentCount, metadata) {
|
|
128
|
+
if (!this.distributedTracer) return;
|
|
129
|
+
const traceContext = this.distributedTracer.traceSwarmOperation(swarmId, activity, topology, undefined, {
|
|
130
|
+
'swarm.agent_count': agentCount.toString(),
|
|
131
|
+
'swarm.activity': activity,
|
|
132
|
+
...metadata
|
|
133
|
+
});
|
|
134
|
+
// Record metrics
|
|
135
|
+
this.recordSwarmActivityMetric(swarmId, activity, topology, agentCount, metadata);
|
|
136
|
+
// Finish span
|
|
137
|
+
setTimeout(()=>{
|
|
138
|
+
this.distributedTracer.finishSpan(traceContext, {
|
|
139
|
+
'swarm.id': swarmId,
|
|
140
|
+
'swarm.activity': activity
|
|
141
|
+
});
|
|
142
|
+
}, 0);
|
|
143
|
+
}
|
|
144
|
+
recordSwarmActivityMetric(swarmId, activity, topology, agentCount, metadata) {
|
|
145
|
+
const tags = {
|
|
146
|
+
'swarm.id': swarmId,
|
|
147
|
+
'swarm.activity': activity,
|
|
148
|
+
'swarm.topology': topology,
|
|
149
|
+
'swarm.agent_count': agentCount.toString(),
|
|
150
|
+
...metadata
|
|
151
|
+
};
|
|
152
|
+
if (this.dataDogCollector) {
|
|
153
|
+
this.dataDogCollector.recordSwarmActivity(agentCount, topology, 1000, true);
|
|
154
|
+
this.dataDogCollector.count('swarm.activity', 1, tags);
|
|
155
|
+
}
|
|
156
|
+
if (this.newRelicCollector) {
|
|
157
|
+
this.newRelicCollector.recordSwarmActivity(agentCount, topology, 1000, true);
|
|
158
|
+
this.newRelicCollector.recordMetric('SwarmActivity', 1, 'count', tags);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// WebSocket Performance Optimization
|
|
162
|
+
optimizeWebSocketPerformance(operation, socketId, duration, success = true, metadata) {
|
|
163
|
+
if (this.distributedTracer) {
|
|
164
|
+
const traceContext = this.distributedTracer.traceWebSocketOperation(operation, socketId, undefined, metadata);
|
|
165
|
+
if (duration) {
|
|
166
|
+
setTimeout(()=>{
|
|
167
|
+
this.distributedTracer.finishSpan(traceContext, {
|
|
168
|
+
'websocket.operation': operation,
|
|
169
|
+
'websocket.success': success.toString()
|
|
170
|
+
});
|
|
171
|
+
}, 0);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Record metrics
|
|
175
|
+
if (duration && this.dataDogCollector) {
|
|
176
|
+
this.dataDogCollector.recordWebSocketEvent(operation, duration, success);
|
|
177
|
+
}
|
|
178
|
+
if (duration && this.newRelicCollector) {
|
|
179
|
+
this.newRelicCollector.recordWebSocketEvent(operation, duration, success);
|
|
180
|
+
}
|
|
181
|
+
// Optimize with performance optimizer
|
|
182
|
+
if (this.performanceOptimizer && operation === 'connection') {
|
|
183
|
+
// This would track active connections
|
|
184
|
+
this.performanceOptimizer.optimizeWebSocketConnections(100); // Placeholder count
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
// Database Performance Monitoring
|
|
188
|
+
monitorDatabasePerformance(operation, query, duration, success = true, metadata) {
|
|
189
|
+
// Record slow queries
|
|
190
|
+
if (query && duration && duration > 1000) {
|
|
191
|
+
this.performanceOptimizer?.recordSlowQuery(query, duration);
|
|
192
|
+
}
|
|
193
|
+
// Record metrics
|
|
194
|
+
if (this.dataDogCollector && duration) {
|
|
195
|
+
this.dataDogCollector.recordDatabaseQuery(operation, metadata?.table || 'unknown', duration, success);
|
|
196
|
+
}
|
|
197
|
+
if (this.newRelicCollector && duration) {
|
|
198
|
+
this.newRelicCollector.recordMetric('DatabaseOperation', duration, 'histogram', {
|
|
199
|
+
'operation.type': operation,
|
|
200
|
+
'operation.table': metadata?.table || 'unknown',
|
|
201
|
+
'operation.status': success ? 'success' : 'failure'
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// Custom Business Metrics
|
|
206
|
+
recordBusinessMetric(metricName, value, tags, type = 'gauge') {
|
|
207
|
+
if (this.dataDogCollector) {
|
|
208
|
+
switch(type){
|
|
209
|
+
case 'count':
|
|
210
|
+
this.dataDogCollector.count(metricName, value, tags);
|
|
211
|
+
break;
|
|
212
|
+
case 'gauge':
|
|
213
|
+
this.dataDogCollector.gauge(metricName, value, tags);
|
|
214
|
+
break;
|
|
215
|
+
case 'histogram':
|
|
216
|
+
this.dataDogCollector.histogram(metricName, value, tags);
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
if (this.newRelicCollector) {
|
|
221
|
+
this.newRelicCollector.recordMetric(metricName, value, type, tags);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Custom Metrics Collection
|
|
225
|
+
startCustomMetrics() {
|
|
226
|
+
if (!this.config.customMetrics?.enabled) return;
|
|
227
|
+
this.customMetricsInterval = setInterval(()=>{
|
|
228
|
+
this.collectCustomMetrics();
|
|
229
|
+
}, this.config.customMetrics.interval);
|
|
230
|
+
}
|
|
231
|
+
collectCustomMetrics() {
|
|
232
|
+
try {
|
|
233
|
+
// Agent-related metrics
|
|
234
|
+
this.recordBusinessMetric('agents.active', 5, {
|
|
235
|
+
type: 'total'
|
|
236
|
+
});
|
|
237
|
+
this.recordBusinessMetric('agents.productivity', 87.5, {
|
|
238
|
+
type: 'percentage'
|
|
239
|
+
});
|
|
240
|
+
// Swarm-related metrics
|
|
241
|
+
this.recordBusinessMetric('swarms.active', 2, {
|
|
242
|
+
type: 'total'
|
|
243
|
+
});
|
|
244
|
+
this.recordBusinessMetric('swarms.consensus_rate', 92.3, {
|
|
245
|
+
type: 'percentage'
|
|
246
|
+
});
|
|
247
|
+
// Performance metrics
|
|
248
|
+
if (this.performanceOptimizer) {
|
|
249
|
+
const metrics = this.performanceOptimizer.getCurrentMetrics();
|
|
250
|
+
this.recordBusinessMetric('performance.memory_usage_percent', metrics.memory.heapUsed / metrics.memory.heapTotal * 100);
|
|
251
|
+
this.recordBusinessMetric('performance.cpu_usage', metrics.cpu.usage);
|
|
252
|
+
this.recordBusinessMetric('performance.event_loop_lag', metrics.eventLoop.lag);
|
|
253
|
+
}
|
|
254
|
+
// System health metrics
|
|
255
|
+
const health = this.getHealthStatus();
|
|
256
|
+
this.recordBusinessMetric('system.health_score', health.overall === 'healthy' ? 100 : health.overall === 'degraded' ? 50 : 0);
|
|
257
|
+
} catch (error) {
|
|
258
|
+
this.logger.error('Error collecting custom metrics', {
|
|
259
|
+
error: error.message
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Health Monitoring
|
|
264
|
+
startHealthChecks() {
|
|
265
|
+
this.healthCheckInterval = setInterval(async ()=>{
|
|
266
|
+
await this.performHealthCheck();
|
|
267
|
+
}, 30000); // Check health every 30 seconds
|
|
268
|
+
}
|
|
269
|
+
async performHealthCheck() {
|
|
270
|
+
try {
|
|
271
|
+
const health = await this.getHealthStatus();
|
|
272
|
+
// Log health status
|
|
273
|
+
if (health.overall !== 'healthy') {
|
|
274
|
+
this.logger.warn('APM integration health issue detected', health);
|
|
275
|
+
// Send alert if configured
|
|
276
|
+
if (this.config.alerting?.enabled) {
|
|
277
|
+
await this.sendHealthAlert(health);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
// Record health metrics
|
|
281
|
+
this.recordBusinessMetric('apm.health_status', health.overall === 'healthy' ? 1 : 0, {
|
|
282
|
+
status: health.overall
|
|
283
|
+
});
|
|
284
|
+
} catch (error) {
|
|
285
|
+
this.logger.error('Error during health check', {
|
|
286
|
+
error: error.message
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
async getHealthStatus() {
|
|
291
|
+
const health = {
|
|
292
|
+
overall: 'healthy',
|
|
293
|
+
components: {
|
|
294
|
+
dataDog: 'disabled',
|
|
295
|
+
newRelic: 'disabled',
|
|
296
|
+
distributedTracing: 'disabled',
|
|
297
|
+
performanceOptimizer: 'disabled'
|
|
298
|
+
},
|
|
299
|
+
metrics: {
|
|
300
|
+
activeTraces: 0,
|
|
301
|
+
activeSpans: 0,
|
|
302
|
+
queuedMetrics: 0,
|
|
303
|
+
recommendations: 0,
|
|
304
|
+
errorRate: 0
|
|
305
|
+
}
|
|
306
|
+
};
|
|
307
|
+
// Check DataDog health
|
|
308
|
+
if (this.dataDogCollector) {
|
|
309
|
+
try {
|
|
310
|
+
const ddHealth = await this.dataDogCollector.healthCheck();
|
|
311
|
+
health.components.dataDog = ddHealth.status;
|
|
312
|
+
health.metrics.queuedMetrics += ddHealth.details.queuedMetrics;
|
|
313
|
+
} catch (error) {
|
|
314
|
+
health.components.dataDog = 'unhealthy';
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// Check New Relic health
|
|
318
|
+
if (this.newRelicCollector) {
|
|
319
|
+
try {
|
|
320
|
+
const nrHealth = await this.newRelicCollector.healthCheck();
|
|
321
|
+
health.components.newRelic = nrHealth.status;
|
|
322
|
+
health.metrics.queuedMetrics += nrHealth.details.queuedMetrics;
|
|
323
|
+
} catch (error) {
|
|
324
|
+
health.components.newRelic = 'unhealthy';
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// Check distributed tracing health
|
|
328
|
+
if (this.distributedTracer) {
|
|
329
|
+
try {
|
|
330
|
+
const dtHealth = this.distributedTracer.healthCheck();
|
|
331
|
+
health.components.distributedTracing = dtHealth.status;
|
|
332
|
+
health.metrics.activeTraces = dtHealth.details.activeTraces;
|
|
333
|
+
health.metrics.activeSpans = dtHealth.details.activeSpans;
|
|
334
|
+
health.metrics.errorRate = dtHealth.details.errorRate;
|
|
335
|
+
} catch (error) {
|
|
336
|
+
health.components.distributedTracing = 'unhealthy';
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
// Check performance optimizer health
|
|
340
|
+
if (this.performanceOptimizer) {
|
|
341
|
+
try {
|
|
342
|
+
const poHealth = this.performanceOptimizer.healthCheck();
|
|
343
|
+
health.components.performanceOptimizer = poHealth.status;
|
|
344
|
+
health.metrics.recommendations = poHealth.details.recommendations;
|
|
345
|
+
} catch (error) {
|
|
346
|
+
health.components.performanceOptimizer = 'unhealthy';
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
// Determine overall health
|
|
350
|
+
const componentStatuses = Object.values(health.components);
|
|
351
|
+
if (componentStatuses.includes('unhealthy')) {
|
|
352
|
+
health.overall = 'unhealthy';
|
|
353
|
+
} else if (componentStatuses.includes('degraded')) {
|
|
354
|
+
health.overall = 'degraded';
|
|
355
|
+
}
|
|
356
|
+
return health;
|
|
357
|
+
}
|
|
358
|
+
async sendHealthAlert(health) {
|
|
359
|
+
if (!this.config.alerting?.enabled) return;
|
|
360
|
+
const message = `APM Integration Health Alert: ${health.overall.toUpperCase()}`;
|
|
361
|
+
const details = {
|
|
362
|
+
components: health.components,
|
|
363
|
+
metrics: health.metrics,
|
|
364
|
+
timestamp: new Date().toISOString()
|
|
365
|
+
};
|
|
366
|
+
// Send to webhook
|
|
367
|
+
if (this.config.alerting.webhookUrl) {
|
|
368
|
+
try {
|
|
369
|
+
// Send webhook notification
|
|
370
|
+
this.logger.info('Would send health alert to webhook', {
|
|
371
|
+
url: this.config.alerting.webhookUrl,
|
|
372
|
+
message,
|
|
373
|
+
details
|
|
374
|
+
});
|
|
375
|
+
} catch (error) {
|
|
376
|
+
this.logger.error('Failed to send webhook alert', {
|
|
377
|
+
error: error.message
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
// Send to Slack
|
|
382
|
+
if (this.config.alerting.slackChannel) {
|
|
383
|
+
try {
|
|
384
|
+
// Send Slack notification
|
|
385
|
+
this.logger.info('Would send health alert to Slack', {
|
|
386
|
+
channel: this.config.alerting.slackChannel,
|
|
387
|
+
message,
|
|
388
|
+
details
|
|
389
|
+
});
|
|
390
|
+
} catch (error) {
|
|
391
|
+
this.logger.error('Failed to send Slack alert', {
|
|
392
|
+
error: error.message
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
// Send email
|
|
397
|
+
if (this.config.alerting.emailRecipients?.length > 0) {
|
|
398
|
+
try {
|
|
399
|
+
// Send email notification
|
|
400
|
+
this.logger.info('Would send health alert via email', {
|
|
401
|
+
recipients: this.config.alerting.emailRecipients,
|
|
402
|
+
message,
|
|
403
|
+
details
|
|
404
|
+
});
|
|
405
|
+
} catch (error) {
|
|
406
|
+
this.logger.error('Failed to send email alert', {
|
|
407
|
+
error: error.message
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
// Performance Analytics
|
|
413
|
+
getPerformanceAnalytics() {
|
|
414
|
+
const analytics = {
|
|
415
|
+
metrics: {},
|
|
416
|
+
recommendations: [],
|
|
417
|
+
trends: {}
|
|
418
|
+
};
|
|
419
|
+
// Get current metrics
|
|
420
|
+
if (this.performanceOptimizer) {
|
|
421
|
+
analytics.metrics = this.performanceOptimizer.getCurrentMetrics();
|
|
422
|
+
analytics.recommendations = this.performanceOptimizer.getRecommendations();
|
|
423
|
+
}
|
|
424
|
+
// Get trace statistics
|
|
425
|
+
if (this.distributedTracer) {
|
|
426
|
+
analytics.trends = this.distributedTracer.getTraceStatistics();
|
|
427
|
+
}
|
|
428
|
+
return analytics;
|
|
429
|
+
}
|
|
430
|
+
// Integration Testing Support
|
|
431
|
+
async runIntegrationTest() {
|
|
432
|
+
const startTime = Date.now();
|
|
433
|
+
const results = {};
|
|
434
|
+
try {
|
|
435
|
+
// Test DataDog integration
|
|
436
|
+
if (this.dataDogCollector) {
|
|
437
|
+
results.dataDog = await this.testDataDogIntegration();
|
|
438
|
+
}
|
|
439
|
+
// Test New Relic integration
|
|
440
|
+
if (this.newRelicCollector) {
|
|
441
|
+
results.newRelic = await this.testNewRelicIntegration();
|
|
442
|
+
}
|
|
443
|
+
// Test distributed tracing
|
|
444
|
+
if (this.distributedTracer) {
|
|
445
|
+
results.distributedTracing = await this.testDistributedTracing();
|
|
446
|
+
}
|
|
447
|
+
// Test performance optimization
|
|
448
|
+
if (this.performanceOptimizer) {
|
|
449
|
+
results.performanceOptimizer = await this.testPerformanceOptimizer();
|
|
450
|
+
}
|
|
451
|
+
const duration = Date.now() - startTime;
|
|
452
|
+
const status = Object.values(results).every((r)=>r.status === 'passed') ? 'passed' : 'failed';
|
|
453
|
+
return {
|
|
454
|
+
status,
|
|
455
|
+
results,
|
|
456
|
+
duration
|
|
457
|
+
};
|
|
458
|
+
} catch (error) {
|
|
459
|
+
const duration = Date.now() - startTime;
|
|
460
|
+
return {
|
|
461
|
+
status: 'failed',
|
|
462
|
+
results: {
|
|
463
|
+
error: error.message
|
|
464
|
+
},
|
|
465
|
+
duration
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
async testDataDogIntegration() {
|
|
470
|
+
try {
|
|
471
|
+
const health = await this.dataDogCollector.healthCheck();
|
|
472
|
+
return {
|
|
473
|
+
status: health.status === 'healthy' ? 'passed' : 'failed',
|
|
474
|
+
details: health
|
|
475
|
+
};
|
|
476
|
+
} catch (error) {
|
|
477
|
+
return {
|
|
478
|
+
status: 'failed',
|
|
479
|
+
details: {
|
|
480
|
+
error: error.message
|
|
481
|
+
}
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
async testNewRelicIntegration() {
|
|
486
|
+
try {
|
|
487
|
+
const health = await this.newRelicCollector.healthCheck();
|
|
488
|
+
return {
|
|
489
|
+
status: health.status === 'healthy' ? 'passed' : 'failed',
|
|
490
|
+
details: health
|
|
491
|
+
};
|
|
492
|
+
} catch (error) {
|
|
493
|
+
return {
|
|
494
|
+
status: 'failed',
|
|
495
|
+
details: {
|
|
496
|
+
error: error.message
|
|
497
|
+
}
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
async testDistributedTracing() {
|
|
502
|
+
try {
|
|
503
|
+
const health = this.distributedTracer.healthCheck();
|
|
504
|
+
return {
|
|
505
|
+
status: health.status === 'healthy' ? 'passed' : 'failed',
|
|
506
|
+
details: health
|
|
507
|
+
};
|
|
508
|
+
} catch (error) {
|
|
509
|
+
return {
|
|
510
|
+
status: 'failed',
|
|
511
|
+
details: {
|
|
512
|
+
error: error.message
|
|
513
|
+
}
|
|
514
|
+
};
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
async testPerformanceOptimizer() {
|
|
518
|
+
try {
|
|
519
|
+
const health = this.performanceOptimizer.healthCheck();
|
|
520
|
+
return {
|
|
521
|
+
status: health.status === 'healthy' ? 'passed' : 'failed',
|
|
522
|
+
details: health
|
|
523
|
+
};
|
|
524
|
+
} catch (error) {
|
|
525
|
+
return {
|
|
526
|
+
status: 'failed',
|
|
527
|
+
details: {
|
|
528
|
+
error: error.message
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
// Disaster Recovery Testing
|
|
534
|
+
async runDisasterRecoveryTest() {
|
|
535
|
+
const startTime = Date.now();
|
|
536
|
+
const scenarios = {};
|
|
537
|
+
try {
|
|
538
|
+
// Test DataDog outage
|
|
539
|
+
if (this.dataDogCollector) {
|
|
540
|
+
scenarios.dataDogOutage = await this.testDataDogOutage();
|
|
541
|
+
}
|
|
542
|
+
// Test New Relic outage
|
|
543
|
+
if (this.newRelicCollector) {
|
|
544
|
+
scenarios.newRelicOutage = await this.testNewRelicOutage();
|
|
545
|
+
}
|
|
546
|
+
// Test high load scenario
|
|
547
|
+
scenarios.highLoad = await this.testHighLoadScenario();
|
|
548
|
+
// Test memory stress
|
|
549
|
+
scenarios.memoryStress = await this.testMemoryStressScenario();
|
|
550
|
+
const duration = Date.now() - startTime;
|
|
551
|
+
const status = Object.values(scenarios).every((s)=>s.status === 'passed') ? 'passed' : 'failed';
|
|
552
|
+
return {
|
|
553
|
+
status,
|
|
554
|
+
scenarios,
|
|
555
|
+
duration
|
|
556
|
+
};
|
|
557
|
+
} catch (error) {
|
|
558
|
+
const duration = Date.now() - startTime;
|
|
559
|
+
return {
|
|
560
|
+
status: 'failed',
|
|
561
|
+
scenarios: {
|
|
562
|
+
error: error.message
|
|
563
|
+
},
|
|
564
|
+
duration
|
|
565
|
+
};
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
async testDataDogOutage() {
|
|
569
|
+
try {
|
|
570
|
+
// Simulate DataDog outage by temporarily disabling
|
|
571
|
+
const originalApiKey = this.config.dataDog?.apiKey;
|
|
572
|
+
if (originalApiKey) {
|
|
573
|
+
this.config.dataDog.apiKey = undefined;
|
|
574
|
+
// Try to send metrics (should gracefully handle outage)
|
|
575
|
+
this.recordBusinessMetric('test.metric', 1, {
|
|
576
|
+
test: 'outage'
|
|
577
|
+
});
|
|
578
|
+
// Restore API key
|
|
579
|
+
this.config.dataDog.apiKey = originalApiKey;
|
|
580
|
+
return {
|
|
581
|
+
status: 'passed',
|
|
582
|
+
details: {
|
|
583
|
+
message: 'DataDog outage handled gracefully'
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
return {
|
|
588
|
+
status: 'skipped',
|
|
589
|
+
details: {
|
|
590
|
+
message: 'DataDog not configured'
|
|
591
|
+
}
|
|
592
|
+
};
|
|
593
|
+
} catch (error) {
|
|
594
|
+
return {
|
|
595
|
+
status: 'failed',
|
|
596
|
+
details: {
|
|
597
|
+
error: error.message
|
|
598
|
+
}
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
async testNewRelicOutage() {
|
|
603
|
+
try {
|
|
604
|
+
// Similar to DataDog outage test
|
|
605
|
+
return {
|
|
606
|
+
status: 'passed',
|
|
607
|
+
details: {
|
|
608
|
+
message: 'New Relic outage handled gracefully'
|
|
609
|
+
}
|
|
610
|
+
};
|
|
611
|
+
} catch (error) {
|
|
612
|
+
return {
|
|
613
|
+
status: 'failed',
|
|
614
|
+
details: {
|
|
615
|
+
error: error.message
|
|
616
|
+
}
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
async testHighLoadScenario() {
|
|
621
|
+
try {
|
|
622
|
+
// Simulate high load by sending many metrics
|
|
623
|
+
const startTime = Date.now();
|
|
624
|
+
const metricCount = 1000;
|
|
625
|
+
for(let i = 0; i < metricCount; i++){
|
|
626
|
+
this.recordBusinessMetric('load.test', i, {
|
|
627
|
+
iteration: i.toString()
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
const duration = Date.now() - startTime;
|
|
631
|
+
return {
|
|
632
|
+
status: duration < 5000 ? 'passed' : 'failed',
|
|
633
|
+
details: {
|
|
634
|
+
metricCount,
|
|
635
|
+
duration
|
|
636
|
+
}
|
|
637
|
+
};
|
|
638
|
+
} catch (error) {
|
|
639
|
+
return {
|
|
640
|
+
status: 'failed',
|
|
641
|
+
details: {
|
|
642
|
+
error: error.message
|
|
643
|
+
}
|
|
644
|
+
};
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
async testMemoryStressScenario() {
|
|
648
|
+
try {
|
|
649
|
+
// Create many traces to test memory usage
|
|
650
|
+
const traceCount = 100;
|
|
651
|
+
for(let i = 0; i < traceCount; i++){
|
|
652
|
+
this.traceAgentLifecycle('test-agent', 'execute', `agent-${i}`, {
|
|
653
|
+
test: 'memory-stress'
|
|
654
|
+
});
|
|
655
|
+
}
|
|
656
|
+
// Check memory usage
|
|
657
|
+
if (this.performanceOptimizer) {
|
|
658
|
+
const metrics = this.performanceOptimizer.getCurrentMetrics();
|
|
659
|
+
const memoryUsagePercent = metrics.memory.heapUsed / metrics.memory.heapTotal * 100;
|
|
660
|
+
return {
|
|
661
|
+
status: memoryUsagePercent < 80 ? 'passed' : 'failed',
|
|
662
|
+
details: {
|
|
663
|
+
traceCount,
|
|
664
|
+
memoryUsagePercent
|
|
665
|
+
}
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
return {
|
|
669
|
+
status: 'skipped',
|
|
670
|
+
details: {
|
|
671
|
+
message: 'Performance optimizer not available'
|
|
672
|
+
}
|
|
673
|
+
};
|
|
674
|
+
} catch (error) {
|
|
675
|
+
return {
|
|
676
|
+
status: 'failed',
|
|
677
|
+
details: {
|
|
678
|
+
error: error.message
|
|
679
|
+
}
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
// Public API Methods
|
|
684
|
+
getCollectors() {
|
|
685
|
+
return {
|
|
686
|
+
dataDog: this.dataDogCollector,
|
|
687
|
+
newRelic: this.newRelicCollector,
|
|
688
|
+
distributedTracer: this.distributedTracer,
|
|
689
|
+
performanceOptimizer: this.performanceOptimizer
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
// Shutdown
|
|
693
|
+
async shutdown() {
|
|
694
|
+
this.logger.info('Shutting down APM integration');
|
|
695
|
+
// Clear intervals
|
|
696
|
+
if (this.customMetricsInterval) {
|
|
697
|
+
clearInterval(this.customMetricsInterval);
|
|
698
|
+
}
|
|
699
|
+
if (this.healthCheckInterval) {
|
|
700
|
+
clearInterval(this.healthCheckInterval);
|
|
701
|
+
}
|
|
702
|
+
// Shutdown collectors
|
|
703
|
+
const shutdownPromises = [];
|
|
704
|
+
if (this.dataDogCollector) {
|
|
705
|
+
shutdownPromises.push(this.dataDogCollector.shutdown());
|
|
706
|
+
}
|
|
707
|
+
if (this.newRelicCollector) {
|
|
708
|
+
shutdownPromises.push(this.newRelicCollector.shutdown());
|
|
709
|
+
}
|
|
710
|
+
if (this.distributedTracer) {
|
|
711
|
+
this.distributedTracer.cleanup();
|
|
712
|
+
}
|
|
713
|
+
if (this.performanceOptimizer) {
|
|
714
|
+
this.performanceOptimizer.shutdown();
|
|
715
|
+
}
|
|
716
|
+
await Promise.all(shutdownPromises);
|
|
717
|
+
this.logger.info('APM integration shutdown complete');
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
export function createAPMIntegration(config = {}) {
|
|
721
|
+
return new APMIntegration(config);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
//# sourceMappingURL=apm-integration.js.map
|