musubi-sdd 3.0.1 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/musubi-change.js +623 -10
- package/bin/musubi-orchestrate.js +456 -0
- package/bin/musubi-trace.js +393 -0
- package/package.json +3 -2
- package/src/analyzers/impact-analyzer.js +682 -0
- package/src/integrations/cicd.js +782 -0
- package/src/integrations/documentation.js +740 -0
- package/src/integrations/examples.js +789 -0
- package/src/integrations/index.js +23 -0
- package/src/integrations/platforms.js +929 -0
- package/src/managers/delta-spec.js +484 -0
- package/src/monitoring/incident-manager.js +890 -0
- package/src/monitoring/index.js +633 -0
- package/src/monitoring/observability.js +938 -0
- package/src/monitoring/release-manager.js +622 -0
- package/src/orchestration/index.js +168 -0
- package/src/orchestration/orchestration-engine.js +409 -0
- package/src/orchestration/pattern-registry.js +319 -0
- package/src/orchestration/patterns/auto.js +386 -0
- package/src/orchestration/patterns/group-chat.js +395 -0
- package/src/orchestration/patterns/human-in-loop.js +506 -0
- package/src/orchestration/patterns/nested.js +322 -0
- package/src/orchestration/patterns/sequential.js +278 -0
- package/src/orchestration/patterns/swarm.js +395 -0
- package/src/orchestration/workflow-orchestrator.js +738 -0
- package/src/reporters/coverage-report.js +452 -0
- package/src/reporters/traceability-matrix-report.js +684 -0
- package/src/steering/advanced-validation.js +812 -0
- package/src/steering/auto-updater.js +670 -0
- package/src/steering/index.js +119 -0
- package/src/steering/quality-metrics.js +650 -0
- package/src/steering/template-constraints.js +789 -0
- package/src/templates/agents/claude-code/skills/agent-assistant/SKILL.md +22 -0
- package/src/templates/agents/claude-code/skills/issue-resolver/SKILL.md +21 -0
- package/src/templates/agents/claude-code/skills/orchestrator/SKILL.md +90 -28
- package/src/templates/agents/claude-code/skills/project-manager/SKILL.md +32 -0
- package/src/templates/agents/claude-code/skills/site-reliability-engineer/SKILL.md +27 -0
- package/src/templates/agents/claude-code/skills/steering/SKILL.md +30 -0
- package/src/templates/agents/claude-code/skills/test-engineer/SKILL.md +21 -0
- package/src/templates/agents/claude-code/skills/ui-ux-designer/SKILL.md +27 -0
- package/src/templates/agents/codex/AGENTS.md +36 -1
- package/src/templates/agents/cursor/AGENTS.md +36 -1
- package/src/templates/agents/gemini-cli/GEMINI.md +36 -1
- package/src/templates/agents/github-copilot/AGENTS.md +65 -1
- package/src/templates/agents/qwen-code/QWEN.md +36 -1
- package/src/templates/agents/windsurf/AGENTS.md +36 -1
- package/src/templates/shared/delta-spec-template.md +246 -0
- package/src/validators/delta-format.js +474 -0
- package/src/validators/traceability-validator.js +561 -0
|
@@ -0,0 +1,890 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incident Manager - Incident response and management
|
|
3
|
+
*
|
|
4
|
+
* Provides incident management capabilities:
|
|
5
|
+
* - Incident lifecycle management
|
|
6
|
+
* - Runbook execution
|
|
7
|
+
* - Post-mortem generation
|
|
8
|
+
* - On-call management
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const { EventEmitter } = require('events');
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Incident Severity Levels
|
|
15
|
+
*/
|
|
16
|
+
const IncidentSeverity = {
|
|
17
|
+
SEV1: 'sev1', // Critical - major customer impact
|
|
18
|
+
SEV2: 'sev2', // High - significant impact
|
|
19
|
+
SEV3: 'sev3', // Medium - limited impact
|
|
20
|
+
SEV4: 'sev4', // Low - minimal impact
|
|
21
|
+
SEV5: 'sev5' // Informational
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Incident Status
|
|
26
|
+
*/
|
|
27
|
+
const IncidentStatus = {
|
|
28
|
+
DETECTED: 'detected',
|
|
29
|
+
TRIAGING: 'triaging',
|
|
30
|
+
INVESTIGATING: 'investigating',
|
|
31
|
+
IDENTIFIED: 'identified',
|
|
32
|
+
MITIGATING: 'mitigating',
|
|
33
|
+
MONITORING: 'monitoring',
|
|
34
|
+
RESOLVED: 'resolved',
|
|
35
|
+
CLOSED: 'closed'
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Runbook Step Status
|
|
40
|
+
*/
|
|
41
|
+
const StepStatus = {
|
|
42
|
+
PENDING: 'pending',
|
|
43
|
+
IN_PROGRESS: 'in-progress',
|
|
44
|
+
COMPLETED: 'completed',
|
|
45
|
+
FAILED: 'failed',
|
|
46
|
+
SKIPPED: 'skipped'
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Incident definition
|
|
51
|
+
*/
|
|
52
|
+
class Incident {
|
|
53
|
+
constructor(options) {
|
|
54
|
+
this.id = options.id || this._generateId();
|
|
55
|
+
this.title = options.title;
|
|
56
|
+
this.description = options.description || '';
|
|
57
|
+
this.severity = options.severity || IncidentSeverity.SEV3;
|
|
58
|
+
this.status = options.status || IncidentStatus.DETECTED;
|
|
59
|
+
|
|
60
|
+
this.detectedAt = options.detectedAt || new Date();
|
|
61
|
+
this.acknowledgedAt = null;
|
|
62
|
+
this.mitigatedAt = null;
|
|
63
|
+
this.resolvedAt = null;
|
|
64
|
+
this.closedAt = null;
|
|
65
|
+
|
|
66
|
+
this.affectedServices = options.affectedServices || [];
|
|
67
|
+
this.impactSummary = options.impactSummary || '';
|
|
68
|
+
this.customerImpact = options.customerImpact || {
|
|
69
|
+
affected: 0,
|
|
70
|
+
percentage: 0
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
this.assignee = options.assignee || null;
|
|
74
|
+
this.responders = options.responders || [];
|
|
75
|
+
this.commander = options.commander || null;
|
|
76
|
+
|
|
77
|
+
this.timeline = [{
|
|
78
|
+
timestamp: this.detectedAt,
|
|
79
|
+
action: 'detected',
|
|
80
|
+
description: 'Incident detected',
|
|
81
|
+
actor: 'system'
|
|
82
|
+
}];
|
|
83
|
+
|
|
84
|
+
this.rootCause = null;
|
|
85
|
+
this.resolution = null;
|
|
86
|
+
this.postMortem = null;
|
|
87
|
+
|
|
88
|
+
this.relatedIncidents = options.relatedIncidents || [];
|
|
89
|
+
this.tags = options.tags || [];
|
|
90
|
+
this.metadata = options.metadata || {};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Acknowledge the incident
|
|
95
|
+
*/
|
|
96
|
+
acknowledge(responder) {
|
|
97
|
+
if (this.acknowledgedAt) {
|
|
98
|
+
throw new Error('Incident already acknowledged');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
this.acknowledgedAt = new Date();
|
|
102
|
+
this.status = IncidentStatus.TRIAGING;
|
|
103
|
+
this.assignee = responder;
|
|
104
|
+
|
|
105
|
+
if (!this.responders.includes(responder)) {
|
|
106
|
+
this.responders.push(responder);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
this._addTimelineEntry('acknowledged', `Acknowledged by ${responder}`, responder);
|
|
110
|
+
return this;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Add a responder
|
|
115
|
+
*/
|
|
116
|
+
addResponder(responder, role = 'responder') {
|
|
117
|
+
if (!this.responders.includes(responder)) {
|
|
118
|
+
this.responders.push(responder);
|
|
119
|
+
this._addTimelineEntry('responder_added', `${responder} joined as ${role}`, responder);
|
|
120
|
+
}
|
|
121
|
+
return this;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Set incident commander
|
|
126
|
+
*/
|
|
127
|
+
setCommander(commander) {
|
|
128
|
+
this.commander = commander;
|
|
129
|
+
this.addResponder(commander, 'commander');
|
|
130
|
+
this._addTimelineEntry('commander_assigned', `${commander} assigned as incident commander`, commander);
|
|
131
|
+
return this;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Transition to a new status
|
|
136
|
+
*/
|
|
137
|
+
updateStatus(newStatus, note = '', actor = 'system') {
|
|
138
|
+
const previousStatus = this.status;
|
|
139
|
+
this.status = newStatus;
|
|
140
|
+
|
|
141
|
+
this._addTimelineEntry('status_change', `Status changed from ${previousStatus} to ${newStatus}. ${note}`, actor);
|
|
142
|
+
|
|
143
|
+
// Update timestamps
|
|
144
|
+
if (newStatus === IncidentStatus.MITIGATING && !this.mitigatedAt) {
|
|
145
|
+
// Record when mitigation started
|
|
146
|
+
}
|
|
147
|
+
if (newStatus === IncidentStatus.RESOLVED) {
|
|
148
|
+
this.resolvedAt = new Date();
|
|
149
|
+
}
|
|
150
|
+
if (newStatus === IncidentStatus.CLOSED) {
|
|
151
|
+
this.closedAt = new Date();
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return this;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Update severity
|
|
159
|
+
*/
|
|
160
|
+
updateSeverity(newSeverity, reason = '', actor = 'system') {
|
|
161
|
+
const previousSeverity = this.severity;
|
|
162
|
+
this.severity = newSeverity;
|
|
163
|
+
this._addTimelineEntry('severity_change', `Severity changed from ${previousSeverity} to ${newSeverity}. ${reason}`, actor);
|
|
164
|
+
return this;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Add a timeline entry
|
|
169
|
+
*/
|
|
170
|
+
addUpdate(description, actor = 'system') {
|
|
171
|
+
this._addTimelineEntry('update', description, actor);
|
|
172
|
+
return this;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Set root cause
|
|
177
|
+
*/
|
|
178
|
+
setRootCause(rootCause, actor = 'system') {
|
|
179
|
+
this.rootCause = rootCause;
|
|
180
|
+
this.status = IncidentStatus.IDENTIFIED;
|
|
181
|
+
this._addTimelineEntry('root_cause_identified', `Root cause identified: ${rootCause}`, actor);
|
|
182
|
+
return this;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Set resolution
|
|
187
|
+
*/
|
|
188
|
+
setResolution(resolution, actor = 'system') {
|
|
189
|
+
this.resolution = resolution;
|
|
190
|
+
this.status = IncidentStatus.RESOLVED;
|
|
191
|
+
this.resolvedAt = new Date();
|
|
192
|
+
this._addTimelineEntry('resolved', `Incident resolved: ${resolution}`, actor);
|
|
193
|
+
return this;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Calculate time metrics
|
|
198
|
+
*/
|
|
199
|
+
getMetrics() {
|
|
200
|
+
const now = new Date();
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
timeToAcknowledge: this.acknowledgedAt
|
|
204
|
+
? (this.acknowledgedAt - this.detectedAt) / 1000
|
|
205
|
+
: null,
|
|
206
|
+
timeToMitigate: this.mitigatedAt
|
|
207
|
+
? (this.mitigatedAt - this.detectedAt) / 1000
|
|
208
|
+
: null,
|
|
209
|
+
timeToResolve: this.resolvedAt
|
|
210
|
+
? (this.resolvedAt - this.detectedAt) / 1000
|
|
211
|
+
: null,
|
|
212
|
+
totalDuration: this.closedAt
|
|
213
|
+
? (this.closedAt - this.detectedAt) / 1000
|
|
214
|
+
: (now - this.detectedAt) / 1000,
|
|
215
|
+
isOpen: !this.closedAt
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Add timeline entry
|
|
221
|
+
* @private
|
|
222
|
+
*/
|
|
223
|
+
_addTimelineEntry(action, description, actor) {
|
|
224
|
+
this.timeline.push({
|
|
225
|
+
timestamp: new Date(),
|
|
226
|
+
action,
|
|
227
|
+
description,
|
|
228
|
+
actor
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Generate unique ID
|
|
234
|
+
* @private
|
|
235
|
+
*/
|
|
236
|
+
_generateId() {
|
|
237
|
+
const date = new Date();
|
|
238
|
+
const dateStr = date.toISOString().slice(0, 10).replace(/-/g, '');
|
|
239
|
+
return `INC-${dateStr}-${Math.random().toString(36).substr(2, 6).toUpperCase()}`;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
toJSON() {
|
|
243
|
+
return {
|
|
244
|
+
id: this.id,
|
|
245
|
+
title: this.title,
|
|
246
|
+
description: this.description,
|
|
247
|
+
severity: this.severity,
|
|
248
|
+
status: this.status,
|
|
249
|
+
detectedAt: this.detectedAt,
|
|
250
|
+
acknowledgedAt: this.acknowledgedAt,
|
|
251
|
+
resolvedAt: this.resolvedAt,
|
|
252
|
+
closedAt: this.closedAt,
|
|
253
|
+
affectedServices: this.affectedServices,
|
|
254
|
+
impactSummary: this.impactSummary,
|
|
255
|
+
customerImpact: this.customerImpact,
|
|
256
|
+
assignee: this.assignee,
|
|
257
|
+
responders: this.responders,
|
|
258
|
+
commander: this.commander,
|
|
259
|
+
timeline: this.timeline,
|
|
260
|
+
rootCause: this.rootCause,
|
|
261
|
+
resolution: this.resolution,
|
|
262
|
+
tags: this.tags,
|
|
263
|
+
metrics: this.getMetrics()
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Runbook definition
|
|
270
|
+
*/
|
|
271
|
+
class Runbook {
|
|
272
|
+
constructor(options) {
|
|
273
|
+
this.id = options.id || `rb-${Date.now()}`;
|
|
274
|
+
this.name = options.name;
|
|
275
|
+
this.description = options.description || '';
|
|
276
|
+
this.version = options.version || '1.0.0';
|
|
277
|
+
this.category = options.category || 'general';
|
|
278
|
+
this.tags = options.tags || [];
|
|
279
|
+
this.estimatedDuration = options.estimatedDuration || '15 minutes';
|
|
280
|
+
|
|
281
|
+
this.steps = (options.steps || []).map((step, index) => ({
|
|
282
|
+
id: step.id || `step-${index + 1}`,
|
|
283
|
+
order: step.order || index + 1,
|
|
284
|
+
title: step.title,
|
|
285
|
+
description: step.description || '',
|
|
286
|
+
command: step.command || null,
|
|
287
|
+
expectedOutput: step.expectedOutput || null,
|
|
288
|
+
onFailure: step.onFailure || 'abort', // abort, continue, retry
|
|
289
|
+
timeout: step.timeout || 300, // seconds
|
|
290
|
+
requiresConfirmation: step.requiresConfirmation || false
|
|
291
|
+
}));
|
|
292
|
+
|
|
293
|
+
this.metadata = options.metadata || {};
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
toJSON() {
|
|
297
|
+
return {
|
|
298
|
+
id: this.id,
|
|
299
|
+
name: this.name,
|
|
300
|
+
description: this.description,
|
|
301
|
+
version: this.version,
|
|
302
|
+
category: this.category,
|
|
303
|
+
tags: this.tags,
|
|
304
|
+
estimatedDuration: this.estimatedDuration,
|
|
305
|
+
steps: this.steps,
|
|
306
|
+
metadata: this.metadata
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Runbook execution context
|
|
313
|
+
*/
|
|
314
|
+
class RunbookExecution {
|
|
315
|
+
constructor(runbook, incident = null) {
|
|
316
|
+
this.id = `exec-${Date.now()}-${Math.random().toString(36).substr(2, 6)}`;
|
|
317
|
+
this.runbook = runbook;
|
|
318
|
+
this.incident = incident;
|
|
319
|
+
this.startedAt = new Date();
|
|
320
|
+
this.completedAt = null;
|
|
321
|
+
this.status = 'running';
|
|
322
|
+
|
|
323
|
+
this.stepResults = runbook.steps.map(step => ({
|
|
324
|
+
stepId: step.id,
|
|
325
|
+
status: StepStatus.PENDING,
|
|
326
|
+
startedAt: null,
|
|
327
|
+
completedAt: null,
|
|
328
|
+
output: null,
|
|
329
|
+
error: null
|
|
330
|
+
}));
|
|
331
|
+
|
|
332
|
+
this.currentStepIndex = 0;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Start executing a step
|
|
337
|
+
*/
|
|
338
|
+
startStep(stepId) {
|
|
339
|
+
const result = this.stepResults.find(r => r.stepId === stepId);
|
|
340
|
+
if (result) {
|
|
341
|
+
result.status = StepStatus.IN_PROGRESS;
|
|
342
|
+
result.startedAt = new Date();
|
|
343
|
+
}
|
|
344
|
+
return this;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Complete a step
|
|
349
|
+
*/
|
|
350
|
+
completeStep(stepId, output = null) {
|
|
351
|
+
const result = this.stepResults.find(r => r.stepId === stepId);
|
|
352
|
+
if (result) {
|
|
353
|
+
result.status = StepStatus.COMPLETED;
|
|
354
|
+
result.completedAt = new Date();
|
|
355
|
+
result.output = output;
|
|
356
|
+
this.currentStepIndex++;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Check if all steps completed
|
|
360
|
+
if (this.currentStepIndex >= this.runbook.steps.length) {
|
|
361
|
+
this.status = 'completed';
|
|
362
|
+
this.completedAt = new Date();
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return this;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Fail a step
|
|
370
|
+
*/
|
|
371
|
+
failStep(stepId, error) {
|
|
372
|
+
const result = this.stepResults.find(r => r.stepId === stepId);
|
|
373
|
+
if (result) {
|
|
374
|
+
result.status = StepStatus.FAILED;
|
|
375
|
+
result.completedAt = new Date();
|
|
376
|
+
result.error = error;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Get step config to determine action
|
|
380
|
+
const step = this.runbook.steps.find(s => s.id === stepId);
|
|
381
|
+
if (step && step.onFailure === 'abort') {
|
|
382
|
+
this.status = 'failed';
|
|
383
|
+
this.completedAt = new Date();
|
|
384
|
+
} else if (step && step.onFailure === 'continue') {
|
|
385
|
+
this.currentStepIndex++;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
return this;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Skip a step
|
|
393
|
+
*/
|
|
394
|
+
skipStep(stepId, reason = '') {
|
|
395
|
+
const result = this.stepResults.find(r => r.stepId === stepId);
|
|
396
|
+
if (result) {
|
|
397
|
+
result.status = StepStatus.SKIPPED;
|
|
398
|
+
result.completedAt = new Date();
|
|
399
|
+
result.output = reason;
|
|
400
|
+
this.currentStepIndex++;
|
|
401
|
+
}
|
|
402
|
+
return this;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Get current step
|
|
407
|
+
*/
|
|
408
|
+
getCurrentStep() {
|
|
409
|
+
if (this.currentStepIndex < this.runbook.steps.length) {
|
|
410
|
+
return this.runbook.steps[this.currentStepIndex];
|
|
411
|
+
}
|
|
412
|
+
return null;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Get execution progress
|
|
417
|
+
*/
|
|
418
|
+
getProgress() {
|
|
419
|
+
const completed = this.stepResults.filter(r =>
|
|
420
|
+
r.status === StepStatus.COMPLETED ||
|
|
421
|
+
r.status === StepStatus.SKIPPED
|
|
422
|
+
).length;
|
|
423
|
+
|
|
424
|
+
return {
|
|
425
|
+
total: this.runbook.steps.length,
|
|
426
|
+
completed,
|
|
427
|
+
percentage: Math.round((completed / this.runbook.steps.length) * 100),
|
|
428
|
+
currentStep: this.getCurrentStep()
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
toJSON() {
|
|
433
|
+
return {
|
|
434
|
+
id: this.id,
|
|
435
|
+
runbookId: this.runbook.id,
|
|
436
|
+
runbookName: this.runbook.name,
|
|
437
|
+
incidentId: this.incident ? this.incident.id : null,
|
|
438
|
+
startedAt: this.startedAt,
|
|
439
|
+
completedAt: this.completedAt,
|
|
440
|
+
status: this.status,
|
|
441
|
+
stepResults: this.stepResults,
|
|
442
|
+
progress: this.getProgress()
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Post-mortem document
|
|
449
|
+
*/
|
|
450
|
+
class PostMortem {
|
|
451
|
+
constructor(incident) {
|
|
452
|
+
this.id = `pm-${incident.id}`;
|
|
453
|
+
this.incidentId = incident.id;
|
|
454
|
+
this.title = `Post-Mortem: ${incident.title}`;
|
|
455
|
+
this.createdAt = new Date();
|
|
456
|
+
this.status = 'draft';
|
|
457
|
+
|
|
458
|
+
// Auto-populate from incident
|
|
459
|
+
this.summary = {
|
|
460
|
+
severity: incident.severity,
|
|
461
|
+
duration: incident.getMetrics().totalDuration,
|
|
462
|
+
affectedServices: incident.affectedServices,
|
|
463
|
+
customerImpact: incident.customerImpact
|
|
464
|
+
};
|
|
465
|
+
|
|
466
|
+
this.timeline = incident.timeline;
|
|
467
|
+
this.rootCause = incident.rootCause || 'TBD';
|
|
468
|
+
this.resolution = incident.resolution || 'TBD';
|
|
469
|
+
|
|
470
|
+
this.detection = {
|
|
471
|
+
method: 'TBD',
|
|
472
|
+
timeToDetect: incident.getMetrics().timeToAcknowledge
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
this.response = {
|
|
476
|
+
responders: incident.responders,
|
|
477
|
+
commander: incident.commander,
|
|
478
|
+
timeToMitigate: incident.getMetrics().timeToMitigate
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
this.actionItems = [];
|
|
482
|
+
this.lessonsLearned = [];
|
|
483
|
+
this.whatWentWell = [];
|
|
484
|
+
this.whatWentPoorly = [];
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Add an action item
|
|
489
|
+
*/
|
|
490
|
+
addActionItem(item) {
|
|
491
|
+
this.actionItems.push({
|
|
492
|
+
id: `ai-${this.actionItems.length + 1}`,
|
|
493
|
+
title: item.title,
|
|
494
|
+
description: item.description || '',
|
|
495
|
+
owner: item.owner || null,
|
|
496
|
+
priority: item.priority || 'medium',
|
|
497
|
+
dueDate: item.dueDate || null,
|
|
498
|
+
status: 'open',
|
|
499
|
+
createdAt: new Date()
|
|
500
|
+
});
|
|
501
|
+
return this;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Add a lesson learned
|
|
506
|
+
*/
|
|
507
|
+
addLessonLearned(lesson) {
|
|
508
|
+
this.lessonsLearned.push(lesson);
|
|
509
|
+
return this;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Add what went well
|
|
514
|
+
*/
|
|
515
|
+
addWhatWentWell(item) {
|
|
516
|
+
this.whatWentWell.push(item);
|
|
517
|
+
return this;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Add what went poorly
|
|
522
|
+
*/
|
|
523
|
+
addWhatWentPoorly(item) {
|
|
524
|
+
this.whatWentPoorly.push(item);
|
|
525
|
+
return this;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Generate markdown document
|
|
530
|
+
*/
|
|
531
|
+
toMarkdown() {
|
|
532
|
+
let md = `# ${this.title}\n\n`;
|
|
533
|
+
md += `**Incident ID:** ${this.incidentId} \n`;
|
|
534
|
+
md += `**Severity:** ${this.summary.severity} \n`;
|
|
535
|
+
md += `**Duration:** ${Math.round(this.summary.duration / 60)} minutes \n`;
|
|
536
|
+
md += `**Status:** ${this.status} \n\n`;
|
|
537
|
+
|
|
538
|
+
md += `## Summary\n\n`;
|
|
539
|
+
md += `**Affected Services:** ${this.summary.affectedServices.join(', ') || 'N/A'} \n`;
|
|
540
|
+
md += `**Customer Impact:** ${this.summary.customerImpact.percentage}% of users affected \n\n`;
|
|
541
|
+
|
|
542
|
+
md += `## Timeline\n\n`;
|
|
543
|
+
for (const entry of this.timeline) {
|
|
544
|
+
const time = new Date(entry.timestamp).toISOString();
|
|
545
|
+
md += `- **${time}** - ${entry.description} (${entry.actor})\n`;
|
|
546
|
+
}
|
|
547
|
+
md += '\n';
|
|
548
|
+
|
|
549
|
+
md += `## Root Cause\n\n${this.rootCause}\n\n`;
|
|
550
|
+
md += `## Resolution\n\n${this.resolution}\n\n`;
|
|
551
|
+
|
|
552
|
+
md += `## Detection\n\n`;
|
|
553
|
+
md += `**Method:** ${this.detection.method} \n`;
|
|
554
|
+
md += `**Time to Detect:** ${this.detection.timeToDetect ? Math.round(this.detection.timeToDetect) + 's' : 'N/A'} \n\n`;
|
|
555
|
+
|
|
556
|
+
md += `## Response\n\n`;
|
|
557
|
+
md += `**Commander:** ${this.response.commander || 'N/A'} \n`;
|
|
558
|
+
md += `**Responders:** ${this.response.responders.join(', ') || 'N/A'} \n`;
|
|
559
|
+
md += `**Time to Mitigate:** ${this.response.timeToMitigate ? Math.round(this.response.timeToMitigate) + 's' : 'N/A'} \n\n`;
|
|
560
|
+
|
|
561
|
+
if (this.whatWentWell.length > 0) {
|
|
562
|
+
md += `## What Went Well\n\n`;
|
|
563
|
+
for (const item of this.whatWentWell) {
|
|
564
|
+
md += `- ${item}\n`;
|
|
565
|
+
}
|
|
566
|
+
md += '\n';
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
if (this.whatWentPoorly.length > 0) {
|
|
570
|
+
md += `## What Went Poorly\n\n`;
|
|
571
|
+
for (const item of this.whatWentPoorly) {
|
|
572
|
+
md += `- ${item}\n`;
|
|
573
|
+
}
|
|
574
|
+
md += '\n';
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
if (this.lessonsLearned.length > 0) {
|
|
578
|
+
md += `## Lessons Learned\n\n`;
|
|
579
|
+
for (const lesson of this.lessonsLearned) {
|
|
580
|
+
md += `- ${lesson}\n`;
|
|
581
|
+
}
|
|
582
|
+
md += '\n';
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
if (this.actionItems.length > 0) {
|
|
586
|
+
md += `## Action Items\n\n`;
|
|
587
|
+
md += `| Priority | Title | Owner | Due Date | Status |\n`;
|
|
588
|
+
md += `|----------|-------|-------|----------|--------|\n`;
|
|
589
|
+
for (const item of this.actionItems) {
|
|
590
|
+
md += `| ${item.priority} | ${item.title} | ${item.owner || 'TBD'} | ${item.dueDate || 'TBD'} | ${item.status} |\n`;
|
|
591
|
+
}
|
|
592
|
+
md += '\n';
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
return md;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
toJSON() {
|
|
599
|
+
return {
|
|
600
|
+
id: this.id,
|
|
601
|
+
incidentId: this.incidentId,
|
|
602
|
+
title: this.title,
|
|
603
|
+
createdAt: this.createdAt,
|
|
604
|
+
status: this.status,
|
|
605
|
+
summary: this.summary,
|
|
606
|
+
timeline: this.timeline,
|
|
607
|
+
rootCause: this.rootCause,
|
|
608
|
+
resolution: this.resolution,
|
|
609
|
+
detection: this.detection,
|
|
610
|
+
response: this.response,
|
|
611
|
+
whatWentWell: this.whatWentWell,
|
|
612
|
+
whatWentPoorly: this.whatWentPoorly,
|
|
613
|
+
lessonsLearned: this.lessonsLearned,
|
|
614
|
+
actionItems: this.actionItems
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Incident Manager
|
|
621
|
+
*/
|
|
622
|
+
class IncidentManager extends EventEmitter {
|
|
623
|
+
constructor(options = {}) {
|
|
624
|
+
super();
|
|
625
|
+
this.incidents = new Map();
|
|
626
|
+
this.runbooks = new Map();
|
|
627
|
+
this.executions = new Map();
|
|
628
|
+
this.postMortems = new Map();
|
|
629
|
+
|
|
630
|
+
this.oncall = {
|
|
631
|
+
primary: options.primaryOncall || null,
|
|
632
|
+
secondary: options.secondaryOncall || null,
|
|
633
|
+
escalation: options.escalation || []
|
|
634
|
+
};
|
|
635
|
+
|
|
636
|
+
this.options = {
|
|
637
|
+
autoAcknowledgeTimeout: options.autoAcknowledgeTimeout || 300, // 5 minutes
|
|
638
|
+
...options
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Create a new incident
|
|
644
|
+
*/
|
|
645
|
+
createIncident(options) {
|
|
646
|
+
const incident = options instanceof Incident ? options : new Incident(options);
|
|
647
|
+
this.incidents.set(incident.id, incident);
|
|
648
|
+
this.emit('incidentCreated', incident);
|
|
649
|
+
|
|
650
|
+
// Auto-notify on-call
|
|
651
|
+
if (this.oncall.primary) {
|
|
652
|
+
this.emit('notify', {
|
|
653
|
+
type: 'incident',
|
|
654
|
+
incident,
|
|
655
|
+
recipient: this.oncall.primary
|
|
656
|
+
});
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
return incident;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Get an incident by ID
|
|
664
|
+
*/
|
|
665
|
+
getIncident(id) {
|
|
666
|
+
return this.incidents.get(id);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
/**
|
|
670
|
+
* List incidents
|
|
671
|
+
*/
|
|
672
|
+
listIncidents(filter = {}) {
|
|
673
|
+
let incidents = [...this.incidents.values()];
|
|
674
|
+
|
|
675
|
+
if (filter.status) {
|
|
676
|
+
const statuses = Array.isArray(filter.status) ? filter.status : [filter.status];
|
|
677
|
+
incidents = incidents.filter(i => statuses.includes(i.status));
|
|
678
|
+
}
|
|
679
|
+
if (filter.severity) {
|
|
680
|
+
const severities = Array.isArray(filter.severity) ? filter.severity : [filter.severity];
|
|
681
|
+
incidents = incidents.filter(i => severities.includes(i.severity));
|
|
682
|
+
}
|
|
683
|
+
if (filter.open) {
|
|
684
|
+
incidents = incidents.filter(i =>
|
|
685
|
+
i.status !== IncidentStatus.RESOLVED &&
|
|
686
|
+
i.status !== IncidentStatus.CLOSED
|
|
687
|
+
);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
return incidents.map(i => i.toJSON());
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/**
|
|
694
|
+
* Acknowledge an incident
|
|
695
|
+
*/
|
|
696
|
+
acknowledgeIncident(incidentId, responder) {
|
|
697
|
+
const incident = this.incidents.get(incidentId);
|
|
698
|
+
if (!incident) throw new Error(`Incident not found: ${incidentId}`);
|
|
699
|
+
|
|
700
|
+
incident.acknowledge(responder);
|
|
701
|
+
this.emit('incidentAcknowledged', incident);
|
|
702
|
+
return incident;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Update incident status
|
|
707
|
+
*/
|
|
708
|
+
updateIncidentStatus(incidentId, newStatus, note = '', actor = 'system') {
|
|
709
|
+
const incident = this.incidents.get(incidentId);
|
|
710
|
+
if (!incident) throw new Error(`Incident not found: ${incidentId}`);
|
|
711
|
+
|
|
712
|
+
incident.updateStatus(newStatus, note, actor);
|
|
713
|
+
this.emit('incidentStatusChanged', { incident, newStatus });
|
|
714
|
+
return incident;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Resolve an incident
|
|
719
|
+
*/
|
|
720
|
+
resolveIncident(incidentId, resolution, actor = 'system') {
|
|
721
|
+
const incident = this.incidents.get(incidentId);
|
|
722
|
+
if (!incident) throw new Error(`Incident not found: ${incidentId}`);
|
|
723
|
+
|
|
724
|
+
incident.setResolution(resolution, actor);
|
|
725
|
+
this.emit('incidentResolved', incident);
|
|
726
|
+
return incident;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* Register a runbook
|
|
731
|
+
*/
|
|
732
|
+
registerRunbook(options) {
|
|
733
|
+
const runbook = options instanceof Runbook ? options : new Runbook(options);
|
|
734
|
+
this.runbooks.set(runbook.id, runbook);
|
|
735
|
+
this.emit('runbookRegistered', runbook);
|
|
736
|
+
return runbook;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Get a runbook
|
|
741
|
+
*/
|
|
742
|
+
getRunbook(id) {
|
|
743
|
+
return this.runbooks.get(id);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
/**
|
|
747
|
+
* List runbooks
|
|
748
|
+
*/
|
|
749
|
+
listRunbooks(filter = {}) {
|
|
750
|
+
let runbooks = [...this.runbooks.values()];
|
|
751
|
+
|
|
752
|
+
if (filter.category) {
|
|
753
|
+
runbooks = runbooks.filter(r => r.category === filter.category);
|
|
754
|
+
}
|
|
755
|
+
if (filter.tag) {
|
|
756
|
+
runbooks = runbooks.filter(r => r.tags.includes(filter.tag));
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return runbooks.map(r => r.toJSON());
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
/**
|
|
763
|
+
* Execute a runbook
|
|
764
|
+
*/
|
|
765
|
+
executeRunbook(runbookId, incident = null) {
|
|
766
|
+
const runbook = this.runbooks.get(runbookId);
|
|
767
|
+
if (!runbook) throw new Error(`Runbook not found: ${runbookId}`);
|
|
768
|
+
|
|
769
|
+
const execution = new RunbookExecution(runbook, incident);
|
|
770
|
+
this.executions.set(execution.id, execution);
|
|
771
|
+
this.emit('runbookExecutionStarted', execution);
|
|
772
|
+
|
|
773
|
+
// Link to incident if provided
|
|
774
|
+
if (incident) {
|
|
775
|
+
incident.addUpdate(`Runbook "${runbook.name}" execution started`, 'system');
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
return execution;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
/**
|
|
782
|
+
* Get execution
|
|
783
|
+
*/
|
|
784
|
+
getExecution(id) {
|
|
785
|
+
return this.executions.get(id);
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
/**
|
|
789
|
+
* Create post-mortem for an incident
|
|
790
|
+
*/
|
|
791
|
+
createPostMortem(incidentId) {
|
|
792
|
+
const incident = this.incidents.get(incidentId);
|
|
793
|
+
if (!incident) throw new Error(`Incident not found: ${incidentId}`);
|
|
794
|
+
|
|
795
|
+
const postMortem = new PostMortem(incident);
|
|
796
|
+
this.postMortems.set(postMortem.id, postMortem);
|
|
797
|
+
incident.postMortem = postMortem.id;
|
|
798
|
+
|
|
799
|
+
this.emit('postMortemCreated', postMortem);
|
|
800
|
+
return postMortem;
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
/**
|
|
804
|
+
* Get post-mortem
|
|
805
|
+
*/
|
|
806
|
+
getPostMortem(id) {
|
|
807
|
+
return this.postMortems.get(id);
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Set on-call schedule
|
|
812
|
+
*/
|
|
813
|
+
setOncall(primary, secondary = null) {
|
|
814
|
+
this.oncall.primary = primary;
|
|
815
|
+
this.oncall.secondary = secondary;
|
|
816
|
+
this.emit('oncallUpdated', this.oncall);
|
|
817
|
+
return this;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
/**
|
|
821
|
+
* Get current on-call
|
|
822
|
+
*/
|
|
823
|
+
getOncall() {
|
|
824
|
+
return { ...this.oncall };
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* Get incident statistics
|
|
829
|
+
*/
|
|
830
|
+
getStatistics(options = {}) {
|
|
831
|
+
const incidents = [...this.incidents.values()];
|
|
832
|
+
const { since } = options;
|
|
833
|
+
|
|
834
|
+
const filtered = since
|
|
835
|
+
? incidents.filter(i => i.detectedAt >= since)
|
|
836
|
+
: incidents;
|
|
837
|
+
|
|
838
|
+
const metrics = filtered.map(i => i.getMetrics());
|
|
839
|
+
const acknowledged = metrics.filter(m => m.timeToAcknowledge !== null);
|
|
840
|
+
const resolved = metrics.filter(m => m.timeToResolve !== null);
|
|
841
|
+
|
|
842
|
+
return {
|
|
843
|
+
total: filtered.length,
|
|
844
|
+
open: filtered.filter(i => i.status !== IncidentStatus.CLOSED && i.status !== IncidentStatus.RESOLVED).length,
|
|
845
|
+
bySeverity: this._countBy(filtered, 'severity'),
|
|
846
|
+
byStatus: this._countBy(filtered, 'status'),
|
|
847
|
+
mttr: resolved.length > 0
|
|
848
|
+
? resolved.reduce((sum, m) => sum + m.timeToResolve, 0) / resolved.length
|
|
849
|
+
: null,
|
|
850
|
+
mtta: acknowledged.length > 0
|
|
851
|
+
? acknowledged.reduce((sum, m) => sum + m.timeToAcknowledge, 0) / acknowledged.length
|
|
852
|
+
: null
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/**
|
|
857
|
+
* Count by property
|
|
858
|
+
* @private
|
|
859
|
+
*/
|
|
860
|
+
_countBy(items, prop) {
|
|
861
|
+
return items.reduce((acc, item) => {
|
|
862
|
+
acc[item[prop]] = (acc[item[prop]] || 0) + 1;
|
|
863
|
+
return acc;
|
|
864
|
+
}, {});
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
/**
|
|
869
|
+
* Create incident manager
|
|
870
|
+
*/
|
|
871
|
+
function createIncidentManager(options = {}) {
|
|
872
|
+
return new IncidentManager(options);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
module.exports = {
|
|
876
|
+
// Classes
|
|
877
|
+
Incident,
|
|
878
|
+
Runbook,
|
|
879
|
+
RunbookExecution,
|
|
880
|
+
PostMortem,
|
|
881
|
+
IncidentManager,
|
|
882
|
+
|
|
883
|
+
// Constants
|
|
884
|
+
IncidentSeverity,
|
|
885
|
+
IncidentStatus,
|
|
886
|
+
StepStatus,
|
|
887
|
+
|
|
888
|
+
// Factory
|
|
889
|
+
createIncidentManager
|
|
890
|
+
};
|