musubi-sdd 3.0.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/bin/musubi-change.js +623 -10
  2. package/bin/musubi-orchestrate.js +456 -0
  3. package/bin/musubi-trace.js +393 -0
  4. package/package.json +3 -2
  5. package/src/analyzers/impact-analyzer.js +682 -0
  6. package/src/integrations/cicd.js +782 -0
  7. package/src/integrations/documentation.js +740 -0
  8. package/src/integrations/examples.js +789 -0
  9. package/src/integrations/index.js +23 -0
  10. package/src/integrations/platforms.js +929 -0
  11. package/src/managers/delta-spec.js +484 -0
  12. package/src/monitoring/incident-manager.js +890 -0
  13. package/src/monitoring/index.js +633 -0
  14. package/src/monitoring/observability.js +938 -0
  15. package/src/monitoring/release-manager.js +622 -0
  16. package/src/orchestration/index.js +168 -0
  17. package/src/orchestration/orchestration-engine.js +409 -0
  18. package/src/orchestration/pattern-registry.js +319 -0
  19. package/src/orchestration/patterns/auto.js +386 -0
  20. package/src/orchestration/patterns/group-chat.js +395 -0
  21. package/src/orchestration/patterns/human-in-loop.js +506 -0
  22. package/src/orchestration/patterns/nested.js +322 -0
  23. package/src/orchestration/patterns/sequential.js +278 -0
  24. package/src/orchestration/patterns/swarm.js +395 -0
  25. package/src/orchestration/workflow-orchestrator.js +738 -0
  26. package/src/reporters/coverage-report.js +452 -0
  27. package/src/reporters/traceability-matrix-report.js +684 -0
  28. package/src/steering/advanced-validation.js +812 -0
  29. package/src/steering/auto-updater.js +670 -0
  30. package/src/steering/index.js +119 -0
  31. package/src/steering/quality-metrics.js +650 -0
  32. package/src/steering/template-constraints.js +789 -0
  33. package/src/templates/agents/claude-code/skills/agent-assistant/SKILL.md +22 -0
  34. package/src/templates/agents/claude-code/skills/issue-resolver/SKILL.md +21 -0
  35. package/src/templates/agents/claude-code/skills/orchestrator/SKILL.md +90 -28
  36. package/src/templates/agents/claude-code/skills/project-manager/SKILL.md +32 -0
  37. package/src/templates/agents/claude-code/skills/site-reliability-engineer/SKILL.md +27 -0
  38. package/src/templates/agents/claude-code/skills/steering/SKILL.md +30 -0
  39. package/src/templates/agents/claude-code/skills/test-engineer/SKILL.md +21 -0
  40. package/src/templates/agents/claude-code/skills/ui-ux-designer/SKILL.md +27 -0
  41. package/src/templates/agents/codex/AGENTS.md +36 -1
  42. package/src/templates/agents/cursor/AGENTS.md +36 -1
  43. package/src/templates/agents/gemini-cli/GEMINI.md +36 -1
  44. package/src/templates/agents/github-copilot/AGENTS.md +65 -1
  45. package/src/templates/agents/qwen-code/QWEN.md +36 -1
  46. package/src/templates/agents/windsurf/AGENTS.md +36 -1
  47. package/src/templates/shared/delta-spec-template.md +246 -0
  48. package/src/validators/delta-format.js +474 -0
  49. package/src/validators/traceability-validator.js +561 -0
@@ -0,0 +1,633 @@
1
+ /**
2
+ * Monitoring Module - SRE, Observability, and Release Management
3
+ *
4
+ * Provides monitoring capabilities for MUSUBI-powered applications:
5
+ * - SLI/SLO definition and tracking
6
+ * - Alerting rules generation
7
+ * - Dashboard templates
8
+ * - Health check patterns
9
+ */
10
+
11
+ const { EventEmitter } = require('events');
12
+
13
+ /**
14
+ * SLO Types
15
+ */
16
+ const SLOType = {
17
+ AVAILABILITY: 'availability',
18
+ LATENCY: 'latency',
19
+ THROUGHPUT: 'throughput',
20
+ ERROR_RATE: 'error-rate',
21
+ CORRECTNESS: 'correctness'
22
+ };
23
+
24
+ /**
25
+ * Alert Severity
26
+ */
27
+ const AlertSeverity = {
28
+ CRITICAL: 'critical',
29
+ WARNING: 'warning',
30
+ INFO: 'info'
31
+ };
32
+
33
+ /**
34
+ * Metric Type
35
+ */
36
+ const MetricType = {
37
+ COUNTER: 'counter',
38
+ GAUGE: 'gauge',
39
+ HISTOGRAM: 'histogram',
40
+ SUMMARY: 'summary'
41
+ };
42
+
43
+ /**
44
+ * SLI (Service Level Indicator) definition
45
+ */
46
+ class SLI {
47
+ constructor(options) {
48
+ this.name = options.name;
49
+ this.description = options.description || '';
50
+ this.type = options.type || SLOType.AVAILABILITY;
51
+ this.metric = options.metric;
52
+ this.unit = options.unit || '';
53
+ this.goodEventsQuery = options.goodEventsQuery || null;
54
+ this.totalEventsQuery = options.totalEventsQuery || null;
55
+ this.threshold = options.threshold || null;
56
+ }
57
+
58
+ /**
59
+ * Generate Prometheus query for this SLI
60
+ */
61
+ toPrometheusQuery() {
62
+ switch (this.type) {
63
+ case SLOType.AVAILABILITY:
64
+ return `sum(rate(${this.metric}_success_total[5m])) / sum(rate(${this.metric}_total[5m]))`;
65
+
66
+ case SLOType.LATENCY:
67
+ return `histogram_quantile(0.95, sum(rate(${this.metric}_bucket[5m])) by (le))`;
68
+
69
+ case SLOType.ERROR_RATE:
70
+ return `sum(rate(${this.metric}_errors_total[5m])) / sum(rate(${this.metric}_total[5m]))`;
71
+
72
+ case SLOType.THROUGHPUT:
73
+ return `sum(rate(${this.metric}_total[5m]))`;
74
+
75
+ default:
76
+ return this.goodEventsQuery || this.metric;
77
+ }
78
+ }
79
+
80
+ toJSON() {
81
+ return {
82
+ name: this.name,
83
+ description: this.description,
84
+ type: this.type,
85
+ metric: this.metric,
86
+ unit: this.unit,
87
+ prometheusQuery: this.toPrometheusQuery()
88
+ };
89
+ }
90
+ }
91
+
92
+ /**
93
+ * SLO (Service Level Objective) definition
94
+ */
95
+ class SLO {
96
+ constructor(options) {
97
+ this.name = options.name;
98
+ this.description = options.description || '';
99
+ this.sli = options.sli instanceof SLI ? options.sli : new SLI(options.sli);
100
+ this.target = options.target; // e.g., 0.999 for 99.9%
101
+ this.window = options.window || '30d'; // Measurement window
102
+ this.burnRateThresholds = options.burnRateThresholds || {
103
+ critical: 14.4, // 1 hour to exhaust error budget
104
+ warning: 6 // 6 hours to exhaust error budget
105
+ };
106
+ }
107
+
108
+ /**
109
+ * Calculate error budget
110
+ */
111
+ calculateErrorBudget() {
112
+ return {
113
+ total: 1 - this.target,
114
+ remaining: null, // Calculated at runtime
115
+ consumptionRate: null
116
+ };
117
+ }
118
+
119
+ /**
120
+ * Generate burn rate alert rule
121
+ */
122
+ toBurnRateAlert() {
123
+ const shortWindow = '5m';
124
+ const longWindow = '1h';
125
+
126
+ return {
127
+ name: `${this.name}_high_burn_rate`,
128
+ expr: `(
129
+ ${this.sli.toPrometheusQuery()}
130
+ ) < ${this.target - ((1 - this.target) * this.burnRateThresholds.critical)}`,
131
+ for: shortWindow,
132
+ labels: {
133
+ severity: AlertSeverity.CRITICAL,
134
+ slo: this.name
135
+ },
136
+ annotations: {
137
+ summary: `High burn rate on SLO: ${this.name}`,
138
+ description: `Error budget will be exhausted within 1 hour at current rate`
139
+ }
140
+ };
141
+ }
142
+
143
+ toJSON() {
144
+ return {
145
+ name: this.name,
146
+ description: this.description,
147
+ sli: this.sli.toJSON(),
148
+ target: this.target,
149
+ targetPercentage: `${(this.target * 100).toFixed(2)}%`,
150
+ window: this.window,
151
+ errorBudget: this.calculateErrorBudget(),
152
+ burnRateAlert: this.toBurnRateAlert()
153
+ };
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Alert Rule definition
159
+ */
160
+ class AlertRule {
161
+ constructor(options) {
162
+ this.name = options.name;
163
+ this.expr = options.expr;
164
+ this.for = options.for || '5m';
165
+ this.severity = options.severity || AlertSeverity.WARNING;
166
+ this.labels = options.labels || {};
167
+ this.annotations = options.annotations || {};
168
+ }
169
+
170
+ /**
171
+ * Generate Prometheus alert rule YAML
172
+ */
173
+ toPrometheusYAML() {
174
+ return `- alert: ${this.name}
175
+ expr: ${this.expr}
176
+ for: ${this.for}
177
+ labels:
178
+ severity: ${this.severity}
179
+ ${Object.entries(this.labels).map(([k, v]) => ` ${k}: ${v}`).join('\n')}
180
+ annotations:
181
+ summary: "${this.annotations.summary || this.name}"
182
+ description: "${this.annotations.description || ''}"`;
183
+ }
184
+
185
+ toJSON() {
186
+ return {
187
+ name: this.name,
188
+ expr: this.expr,
189
+ for: this.for,
190
+ severity: this.severity,
191
+ labels: this.labels,
192
+ annotations: this.annotations
193
+ };
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Health Check definition
199
+ */
200
+ class HealthCheck {
201
+ constructor(options) {
202
+ this.name = options.name;
203
+ this.endpoint = options.endpoint || '/health';
204
+ this.interval = options.interval || 30000; // 30 seconds
205
+ this.timeout = options.timeout || 5000;
206
+ this.checks = options.checks || [];
207
+ }
208
+
209
+ /**
210
+ * Add a dependency check
211
+ */
212
+ addCheck(check) {
213
+ this.checks.push({
214
+ name: check.name,
215
+ type: check.type || 'dependency',
216
+ critical: check.critical !== false,
217
+ check: check.check
218
+ });
219
+ return this;
220
+ }
221
+
222
+ /**
223
+ * Generate health check response
224
+ */
225
+ async execute() {
226
+ const results = [];
227
+ let healthy = true;
228
+
229
+ for (const check of this.checks) {
230
+ try {
231
+ const startTime = Date.now();
232
+ const checkResult = await Promise.race([
233
+ check.check(),
234
+ new Promise((_, reject) =>
235
+ setTimeout(() => reject(new Error('Timeout')), this.timeout)
236
+ )
237
+ ]);
238
+
239
+ results.push({
240
+ name: check.name,
241
+ status: 'healthy',
242
+ latency: Date.now() - startTime,
243
+ details: checkResult
244
+ });
245
+ } catch (error) {
246
+ results.push({
247
+ name: check.name,
248
+ status: 'unhealthy',
249
+ error: error.message
250
+ });
251
+ if (check.critical) healthy = false;
252
+ }
253
+ }
254
+
255
+ return {
256
+ status: healthy ? 'healthy' : 'unhealthy',
257
+ timestamp: new Date().toISOString(),
258
+ checks: results
259
+ };
260
+ }
261
+
262
+ /**
263
+ * Generate Express.js health endpoint handler
264
+ */
265
+ toExpressHandler() {
266
+ const check = this;
267
+ return `
268
+ app.get('${this.endpoint}', async (req, res) => {
269
+ const health = await healthCheck.execute();
270
+ res.status(health.status === 'healthy' ? 200 : 503).json(health);
271
+ });
272
+
273
+ app.get('${this.endpoint}/live', (req, res) => {
274
+ res.status(200).json({ status: 'alive', timestamp: new Date().toISOString() });
275
+ });
276
+
277
+ app.get('${this.endpoint}/ready', async (req, res) => {
278
+ const health = await healthCheck.execute();
279
+ res.status(health.status === 'healthy' ? 200 : 503).json(health);
280
+ });`;
281
+ }
282
+
283
+ toJSON() {
284
+ return {
285
+ name: this.name,
286
+ endpoint: this.endpoint,
287
+ interval: this.interval,
288
+ timeout: this.timeout,
289
+ checks: this.checks.map(c => ({
290
+ name: c.name,
291
+ type: c.type,
292
+ critical: c.critical
293
+ }))
294
+ };
295
+ }
296
+ }
297
+
298
+ /**
299
+ * Monitoring Configuration
300
+ */
301
+ class MonitoringConfig extends EventEmitter {
302
+ constructor(options = {}) {
303
+ super();
304
+ this.serviceName = options.serviceName || 'musubi-service';
305
+ this.environment = options.environment || 'production';
306
+ this.slos = new Map();
307
+ this.alerts = new Map();
308
+ this.healthChecks = new Map();
309
+ this.metrics = new Map();
310
+ }
311
+
312
+ /**
313
+ * Define an SLO
314
+ */
315
+ defineSLO(slo) {
316
+ const sloInstance = slo instanceof SLO ? slo : new SLO(slo);
317
+ this.slos.set(sloInstance.name, sloInstance);
318
+ this.emit('sloAdded', sloInstance);
319
+ return this;
320
+ }
321
+
322
+ /**
323
+ * Get an SLO
324
+ */
325
+ getSLO(name) {
326
+ return this.slos.get(name);
327
+ }
328
+
329
+ /**
330
+ * List all SLOs
331
+ */
332
+ listSLOs() {
333
+ return [...this.slos.values()].map(s => s.toJSON());
334
+ }
335
+
336
+ /**
337
+ * Define an alert rule
338
+ */
339
+ defineAlert(alert) {
340
+ const alertInstance = alert instanceof AlertRule ? alert : new AlertRule(alert);
341
+ this.alerts.set(alertInstance.name, alertInstance);
342
+ this.emit('alertAdded', alertInstance);
343
+ return this;
344
+ }
345
+
346
+ /**
347
+ * Get an alert
348
+ */
349
+ getAlert(name) {
350
+ return this.alerts.get(name);
351
+ }
352
+
353
+ /**
354
+ * List all alerts
355
+ */
356
+ listAlerts() {
357
+ return [...this.alerts.values()].map(a => a.toJSON());
358
+ }
359
+
360
+ /**
361
+ * Define a health check
362
+ */
363
+ defineHealthCheck(healthCheck) {
364
+ const hcInstance = healthCheck instanceof HealthCheck
365
+ ? healthCheck
366
+ : new HealthCheck(healthCheck);
367
+ this.healthChecks.set(hcInstance.name, hcInstance);
368
+ this.emit('healthCheckAdded', hcInstance);
369
+ return this;
370
+ }
371
+
372
+ /**
373
+ * Get a health check
374
+ */
375
+ getHealthCheck(name) {
376
+ return this.healthChecks.get(name);
377
+ }
378
+
379
+ /**
380
+ * Define a metric
381
+ */
382
+ defineMetric(metric) {
383
+ this.metrics.set(metric.name, {
384
+ name: metric.name,
385
+ type: metric.type || MetricType.COUNTER,
386
+ help: metric.help || '',
387
+ labels: metric.labels || []
388
+ });
389
+ return this;
390
+ }
391
+
392
+ /**
393
+ * Generate Prometheus metrics configuration
394
+ */
395
+ toPrometheusConfig() {
396
+ const rules = [];
397
+
398
+ // Generate SLO-based alerts
399
+ for (const slo of this.slos.values()) {
400
+ rules.push(slo.toBurnRateAlert());
401
+ }
402
+
403
+ // Add custom alerts
404
+ for (const alert of this.alerts.values()) {
405
+ rules.push(alert.toJSON());
406
+ }
407
+
408
+ return {
409
+ groups: [{
410
+ name: `${this.serviceName}-alerts`,
411
+ rules
412
+ }]
413
+ };
414
+ }
415
+
416
+ /**
417
+ * Generate Grafana dashboard JSON
418
+ */
419
+ toGrafanaDashboard() {
420
+ const panels = [];
421
+ let y = 0;
422
+
423
+ // SLO panels
424
+ for (const slo of this.slos.values()) {
425
+ panels.push({
426
+ id: panels.length + 1,
427
+ type: 'gauge',
428
+ title: slo.name,
429
+ gridPos: { x: 0, y, w: 8, h: 6 },
430
+ targets: [{
431
+ expr: slo.sli.toPrometheusQuery(),
432
+ legendFormat: slo.name
433
+ }],
434
+ fieldConfig: {
435
+ defaults: {
436
+ thresholds: {
437
+ mode: 'absolute',
438
+ steps: [
439
+ { color: 'red', value: null },
440
+ { color: 'yellow', value: slo.target - 0.01 },
441
+ { color: 'green', value: slo.target }
442
+ ]
443
+ },
444
+ min: 0,
445
+ max: 1,
446
+ unit: 'percentunit'
447
+ }
448
+ }
449
+ });
450
+ y += 6;
451
+ }
452
+
453
+ return {
454
+ title: `${this.serviceName} SLO Dashboard`,
455
+ uid: `${this.serviceName}-slo`,
456
+ tags: ['slo', 'sre', this.serviceName],
457
+ timezone: 'browser',
458
+ panels,
459
+ refresh: '30s',
460
+ time: { from: 'now-24h', to: 'now' }
461
+ };
462
+ }
463
+
464
+ /**
465
+ * Generate complete monitoring configuration
466
+ */
467
+ toJSON() {
468
+ return {
469
+ serviceName: this.serviceName,
470
+ environment: this.environment,
471
+ slos: this.listSLOs(),
472
+ alerts: this.listAlerts(),
473
+ healthChecks: [...this.healthChecks.values()].map(h => h.toJSON()),
474
+ metrics: [...this.metrics.values()],
475
+ prometheus: this.toPrometheusConfig(),
476
+ grafana: this.toGrafanaDashboard()
477
+ };
478
+ }
479
+ }
480
+
481
+ /**
482
+ * Pre-defined SLO templates
483
+ */
484
+ const SLOTemplates = {
485
+ /**
486
+ * API Availability SLO
487
+ */
488
+ API_AVAILABILITY: (target = 0.999) => new SLO({
489
+ name: 'api-availability',
490
+ description: 'API endpoint availability',
491
+ sli: {
492
+ name: 'api-success-rate',
493
+ type: SLOType.AVAILABILITY,
494
+ metric: 'http_requests'
495
+ },
496
+ target,
497
+ window: '30d'
498
+ }),
499
+
500
+ /**
501
+ * API Latency SLO
502
+ */
503
+ API_LATENCY: (target = 0.95, thresholdMs = 200) => new SLO({
504
+ name: 'api-latency',
505
+ description: `95th percentile latency under ${thresholdMs}ms`,
506
+ sli: {
507
+ name: 'api-response-time',
508
+ type: SLOType.LATENCY,
509
+ metric: 'http_request_duration_seconds',
510
+ threshold: thresholdMs / 1000
511
+ },
512
+ target,
513
+ window: '30d'
514
+ }),
515
+
516
+ /**
517
+ * Error Rate SLO
518
+ */
519
+ ERROR_RATE: (target = 0.99) => new SLO({
520
+ name: 'error-rate',
521
+ description: 'Low error rate objective',
522
+ sli: {
523
+ name: 'error-rate-indicator',
524
+ type: SLOType.ERROR_RATE,
525
+ metric: 'http_requests'
526
+ },
527
+ target,
528
+ window: '7d'
529
+ })
530
+ };
531
+
532
+ /**
533
+ * Pre-defined Alert templates
534
+ */
535
+ const AlertTemplates = {
536
+ /**
537
+ * High Error Rate Alert
538
+ */
539
+ HIGH_ERROR_RATE: (threshold = 0.05) => new AlertRule({
540
+ name: 'HighErrorRate',
541
+ expr: `sum(rate(http_requests_errors_total[5m])) / sum(rate(http_requests_total[5m])) > ${threshold}`,
542
+ for: '5m',
543
+ severity: AlertSeverity.CRITICAL,
544
+ annotations: {
545
+ summary: 'High error rate detected',
546
+ description: `Error rate is above ${threshold * 100}%`
547
+ }
548
+ }),
549
+
550
+ /**
551
+ * High Latency Alert
552
+ */
553
+ HIGH_LATENCY: (thresholdMs = 500) => new AlertRule({
554
+ name: 'HighLatency',
555
+ expr: `histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le)) > ${thresholdMs / 1000}`,
556
+ for: '5m',
557
+ severity: AlertSeverity.WARNING,
558
+ annotations: {
559
+ summary: 'High latency detected',
560
+ description: `P95 latency is above ${thresholdMs}ms`
561
+ }
562
+ }),
563
+
564
+ /**
565
+ * Service Down Alert
566
+ */
567
+ SERVICE_DOWN: () => new AlertRule({
568
+ name: 'ServiceDown',
569
+ expr: 'up == 0',
570
+ for: '1m',
571
+ severity: AlertSeverity.CRITICAL,
572
+ annotations: {
573
+ summary: 'Service is down',
574
+ description: 'Service instance is not responding'
575
+ }
576
+ }),
577
+
578
+ /**
579
+ * High Memory Usage Alert
580
+ */
581
+ HIGH_MEMORY: (threshold = 0.9) => new AlertRule({
582
+ name: 'HighMemoryUsage',
583
+ expr: `process_resident_memory_bytes / node_memory_MemTotal_bytes > ${threshold}`,
584
+ for: '5m',
585
+ severity: AlertSeverity.WARNING,
586
+ annotations: {
587
+ summary: 'High memory usage',
588
+ description: `Memory usage is above ${threshold * 100}%`
589
+ }
590
+ })
591
+ };
592
+
593
+ /**
594
+ * Create a monitoring configuration
595
+ */
596
+ function createMonitoringConfig(options = {}) {
597
+ return new MonitoringConfig(options);
598
+ }
599
+
600
+ // Import sub-modules
601
+ const releaseManagerModule = require('./release-manager');
602
+ const incidentManagerModule = require('./incident-manager');
603
+ const observabilityModule = require('./observability');
604
+
605
+ module.exports = {
606
+ // Classes
607
+ SLI,
608
+ SLO,
609
+ AlertRule,
610
+ HealthCheck,
611
+ MonitoringConfig,
612
+
613
+ // Constants
614
+ SLOType,
615
+ AlertSeverity,
616
+ MetricType,
617
+
618
+ // Templates
619
+ SLOTemplates,
620
+ AlertTemplates,
621
+
622
+ // Factory
623
+ createMonitoringConfig,
624
+
625
+ // Release Manager
626
+ ...releaseManagerModule,
627
+
628
+ // Incident Manager
629
+ ...incidentManagerModule,
630
+
631
+ // Observability
632
+ ...observabilityModule
633
+ };