shipwright-cli 1.10.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +221 -55
  2. package/completions/_shipwright +264 -32
  3. package/completions/shipwright.bash +118 -26
  4. package/completions/shipwright.fish +80 -2
  5. package/dashboard/server.ts +208 -0
  6. package/docs/strategy/01-market-research.md +619 -0
  7. package/docs/strategy/02-mission-and-brand.md +587 -0
  8. package/docs/strategy/03-gtm-and-roadmap.md +759 -0
  9. package/docs/strategy/QUICK-START.txt +289 -0
  10. package/docs/strategy/README.md +172 -0
  11. package/docs/tmux-research/TMUX-ARCHITECTURE.md +567 -0
  12. package/docs/tmux-research/TMUX-AUDIT.md +925 -0
  13. package/docs/tmux-research/TMUX-BEST-PRACTICES-2025-2026.md +829 -0
  14. package/docs/tmux-research/TMUX-QUICK-REFERENCE.md +543 -0
  15. package/docs/tmux-research/TMUX-RESEARCH-INDEX.md +438 -0
  16. package/package.json +4 -2
  17. package/scripts/lib/helpers.sh +7 -0
  18. package/scripts/sw +323 -2
  19. package/scripts/sw-activity.sh +500 -0
  20. package/scripts/sw-adaptive.sh +925 -0
  21. package/scripts/sw-adversarial.sh +1 -1
  22. package/scripts/sw-architecture-enforcer.sh +1 -1
  23. package/scripts/sw-auth.sh +613 -0
  24. package/scripts/sw-autonomous.sh +754 -0
  25. package/scripts/sw-changelog.sh +704 -0
  26. package/scripts/sw-checkpoint.sh +1 -1
  27. package/scripts/sw-ci.sh +602 -0
  28. package/scripts/sw-cleanup.sh +1 -1
  29. package/scripts/sw-code-review.sh +698 -0
  30. package/scripts/sw-connect.sh +1 -1
  31. package/scripts/sw-context.sh +605 -0
  32. package/scripts/sw-cost.sh +44 -3
  33. package/scripts/sw-daemon.sh +568 -138
  34. package/scripts/sw-dashboard.sh +1 -1
  35. package/scripts/sw-db.sh +1380 -0
  36. package/scripts/sw-decompose.sh +539 -0
  37. package/scripts/sw-deps.sh +551 -0
  38. package/scripts/sw-developer-simulation.sh +1 -1
  39. package/scripts/sw-discovery.sh +412 -0
  40. package/scripts/sw-docs-agent.sh +539 -0
  41. package/scripts/sw-docs.sh +1 -1
  42. package/scripts/sw-doctor.sh +107 -1
  43. package/scripts/sw-dora.sh +615 -0
  44. package/scripts/sw-durable.sh +710 -0
  45. package/scripts/sw-e2e-orchestrator.sh +535 -0
  46. package/scripts/sw-eventbus.sh +393 -0
  47. package/scripts/sw-feedback.sh +479 -0
  48. package/scripts/sw-fix.sh +1 -1
  49. package/scripts/sw-fleet-discover.sh +567 -0
  50. package/scripts/sw-fleet-viz.sh +404 -0
  51. package/scripts/sw-fleet.sh +8 -1
  52. package/scripts/sw-github-app.sh +596 -0
  53. package/scripts/sw-github-checks.sh +4 -4
  54. package/scripts/sw-github-deploy.sh +1 -1
  55. package/scripts/sw-github-graphql.sh +1 -1
  56. package/scripts/sw-guild.sh +569 -0
  57. package/scripts/sw-heartbeat.sh +1 -1
  58. package/scripts/sw-hygiene.sh +559 -0
  59. package/scripts/sw-incident.sh +656 -0
  60. package/scripts/sw-init.sh +237 -24
  61. package/scripts/sw-instrument.sh +699 -0
  62. package/scripts/sw-intelligence.sh +1 -1
  63. package/scripts/sw-jira.sh +1 -1
  64. package/scripts/sw-launchd.sh +363 -28
  65. package/scripts/sw-linear.sh +1 -1
  66. package/scripts/sw-logs.sh +1 -1
  67. package/scripts/sw-loop.sh +267 -21
  68. package/scripts/sw-memory.sh +18 -1
  69. package/scripts/sw-mission-control.sh +487 -0
  70. package/scripts/sw-model-router.sh +545 -0
  71. package/scripts/sw-otel.sh +596 -0
  72. package/scripts/sw-oversight.sh +764 -0
  73. package/scripts/sw-pipeline-composer.sh +1 -1
  74. package/scripts/sw-pipeline-vitals.sh +1 -1
  75. package/scripts/sw-pipeline.sh +947 -35
  76. package/scripts/sw-pm.sh +758 -0
  77. package/scripts/sw-pr-lifecycle.sh +522 -0
  78. package/scripts/sw-predictive.sh +8 -1
  79. package/scripts/sw-prep.sh +1 -1
  80. package/scripts/sw-ps.sh +1 -1
  81. package/scripts/sw-public-dashboard.sh +798 -0
  82. package/scripts/sw-quality.sh +595 -0
  83. package/scripts/sw-reaper.sh +1 -1
  84. package/scripts/sw-recruit.sh +2248 -0
  85. package/scripts/sw-regression.sh +642 -0
  86. package/scripts/sw-release-manager.sh +736 -0
  87. package/scripts/sw-release.sh +706 -0
  88. package/scripts/sw-remote.sh +1 -1
  89. package/scripts/sw-replay.sh +520 -0
  90. package/scripts/sw-retro.sh +691 -0
  91. package/scripts/sw-scale.sh +444 -0
  92. package/scripts/sw-security-audit.sh +505 -0
  93. package/scripts/sw-self-optimize.sh +1 -1
  94. package/scripts/sw-session.sh +1 -1
  95. package/scripts/sw-setup.sh +263 -127
  96. package/scripts/sw-standup.sh +712 -0
  97. package/scripts/sw-status.sh +44 -2
  98. package/scripts/sw-strategic.sh +806 -0
  99. package/scripts/sw-stream.sh +450 -0
  100. package/scripts/sw-swarm.sh +620 -0
  101. package/scripts/sw-team-stages.sh +511 -0
  102. package/scripts/sw-templates.sh +4 -4
  103. package/scripts/sw-testgen.sh +566 -0
  104. package/scripts/sw-tmux-pipeline.sh +554 -0
  105. package/scripts/sw-tmux-role-color.sh +58 -0
  106. package/scripts/sw-tmux-status.sh +128 -0
  107. package/scripts/sw-tmux.sh +1 -1
  108. package/scripts/sw-trace.sh +485 -0
  109. package/scripts/sw-tracker-github.sh +188 -0
  110. package/scripts/sw-tracker-jira.sh +172 -0
  111. package/scripts/sw-tracker-linear.sh +251 -0
  112. package/scripts/sw-tracker.sh +117 -2
  113. package/scripts/sw-triage.sh +627 -0
  114. package/scripts/sw-upgrade.sh +1 -1
  115. package/scripts/sw-ux.sh +677 -0
  116. package/scripts/sw-webhook.sh +627 -0
  117. package/scripts/sw-widgets.sh +530 -0
  118. package/scripts/sw-worktree.sh +1 -1
  119. package/templates/pipelines/autonomous.json +2 -2
  120. package/tmux/shipwright-overlay.conf +35 -17
  121. package/tmux/tmux.conf +23 -21
@@ -0,0 +1,656 @@
1
+ #!/usr/bin/env bash
2
+ # ╔═══════════════════════════════════════════════════════════════════════════╗
3
+ # ║ shipwright incident — Autonomous Incident Detection & Response ║
4
+ # ║ Detect failures · Triage · Root cause analysis · Auto-remediate ║
5
+ # ╚═══════════════════════════════════════════════════════════════════════════╝
6
+ set -euo pipefail
7
+ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR
8
+
9
+ VERSION="2.1.0"
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
12
+
13
+ # ─── Colors (matches Seth's tmux theme) ─────────────────────────────────────
14
+ CYAN='\033[38;2;0;212;255m' # #00d4ff — primary accent
15
+ PURPLE='\033[38;2;124;58;237m' # #7c3aed — secondary
16
+ BLUE='\033[38;2;0;102;255m' # #0066ff — tertiary
17
+ GREEN='\033[38;2;74;222;128m' # success
18
+ YELLOW='\033[38;2;250;204;21m' # warning
19
+ RED='\033[38;2;248;113;113m' # error
20
+ DIM='\033[2m'
21
+ BOLD='\033[1m'
22
+ RESET='\033[0m'
23
+
24
+ # ─── Cross-platform compatibility ──────────────────────────────────────────
25
+ # shellcheck source=lib/compat.sh
26
+ [[ -f "$SCRIPT_DIR/lib/compat.sh" ]] && source "$SCRIPT_DIR/lib/compat.sh"
27
+
28
+ # ─── Output Helpers ─────────────────────────────────────────────────────────
29
+ info() { echo -e "${CYAN}${BOLD}▸${RESET} $*"; }
30
+ success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; }
31
+ warn() { echo -e "${YELLOW}${BOLD}⚠${RESET} $*"; }
32
+ error() { echo -e "${RED}${BOLD}✗${RESET} $*" >&2; }
33
+
34
+ now_iso() { date -u +"%Y-%m-%dT%H:%M:%SZ"; }
35
+ now_epoch() { date +%s; }
36
+
37
+ format_duration() {
38
+ local secs="$1"
39
+ if [[ "$secs" -ge 3600 ]]; then
40
+ printf "%dh %dm %ds" $((secs/3600)) $((secs%3600/60)) $((secs%60))
41
+ elif [[ "$secs" -ge 60 ]]; then
42
+ printf "%dm %ds" $((secs/60)) $((secs%60))
43
+ else
44
+ printf "%ds" "$secs"
45
+ fi
46
+ }
47
+
48
+ # ─── Structured Event Log ──────────────────────────────────────────────────
49
+ EVENTS_FILE="${HOME}/.shipwright/events.jsonl"
50
+
51
+ emit_event() {
52
+ local event_type="$1"
53
+ shift
54
+ local json_fields=""
55
+ for kv in "$@"; do
56
+ local key="${kv%%=*}"
57
+ local val="${kv#*=}"
58
+ if [[ "$val" =~ ^-?[0-9]+\.?[0-9]*$ ]]; then
59
+ json_fields="${json_fields},\"${key}\":${val}"
60
+ else
61
+ val="${val//\"/\\\"}"
62
+ json_fields="${json_fields},\"${key}\":\"${val}\""
63
+ fi
64
+ done
65
+ mkdir -p "${HOME}/.shipwright"
66
+ echo "{\"ts\":\"$(now_iso)\",\"ts_epoch\":$(now_epoch),\"type\":\"${event_type}\"${json_fields}}" >> "$EVENTS_FILE"
67
+ }
68
+
69
+ # ─── State Directories ──────────────────────────────────────────────────────
70
+ INCIDENTS_DIR="${HOME}/.shipwright/incidents"
71
+ INCIDENT_CONFIG="${INCIDENTS_DIR}/config.json"
72
+ MONITOR_PID_FILE="${INCIDENTS_DIR}/monitor.pid"
73
+
74
+ ensure_incident_dir() {
75
+ mkdir -p "$INCIDENTS_DIR"
76
+ [[ -f "$INCIDENT_CONFIG" ]] || cat > "$INCIDENT_CONFIG" << 'EOF'
77
+ {
78
+ "auto_response_enabled": true,
79
+ "p0_auto_hotfix": true,
80
+ "p1_auto_hotfix": false,
81
+ "auto_rollback_enabled": false,
82
+ "notification_channels": ["stdout"],
83
+ "severity_thresholds": {
84
+ "p0_impact_count": 3,
85
+ "p0_deploy_failure": true,
86
+ "p1_test_regression_count": 5,
87
+ "p1_pipeline_failure_rate": 0.3
88
+ },
89
+ "root_cause_patterns": {
90
+ "timeout_keywords": ["timeout", "deadline", "too slow"],
91
+ "memory_keywords": ["out of memory", "OOM", "heap"],
92
+ "dependency_keywords": ["dependency", "import", "require", "not found"],
93
+ "auth_keywords": ["auth", "permission", "forbidden", "401", "403"]
94
+ }
95
+ }
96
+ EOF
97
+ }
98
+
99
+ # ─── Failure Detection ──────────────────────────────────────────────────────
100
+
101
+ detect_pipeline_failures() {
102
+ local since="${1:-3600}" # Last N seconds
103
+ local cutoff_time=$(($(now_epoch) - since))
104
+
105
+ [[ ! -f "$EVENTS_FILE" ]] && return 0
106
+
107
+ awk -v cutoff="$cutoff_time" -F'"' '
108
+ BEGIN { count=0 }
109
+ /pipeline\.failed|stage\.failed|test\.failed|deploy\.failed/ {
110
+ for (i=1; i<=NF; i++) {
111
+ if ($i ~ /ts_epoch/) {
112
+ ts_epoch_val=$(i+2)
113
+ gsub(/^[^0-9]*/, "", ts_epoch_val)
114
+ gsub(/[^0-9].*/, "", ts_epoch_val)
115
+ if (ts_epoch_val+0 > cutoff) {
116
+ print $0
117
+ count++
118
+ }
119
+ }
120
+ }
121
+ }
122
+ END { exit (count > 0 ? 0 : 1) }
123
+ ' "$EVENTS_FILE"
124
+ }
125
+
126
+ get_recent_failures() {
127
+ local since="${1:-3600}"
128
+ local cutoff_time=$(($(now_epoch) - since))
129
+
130
+ [[ ! -f "$EVENTS_FILE" ]] && echo "[]" && return 0
131
+
132
+ jq -s --arg cutoff "$cutoff_time" '
133
+ map(
134
+ select(
135
+ (.ts_epoch | tonumber) > ($cutoff | tonumber) and
136
+ (.type | contains("failed") or contains("error") or contains("timeout"))
137
+ ) |
138
+ {
139
+ ts: .ts,
140
+ ts_epoch: .ts_epoch,
141
+ type: .type,
142
+ issue: .issue,
143
+ stage: .stage,
144
+ reason: .reason,
145
+ error: .error
146
+ }
147
+ )
148
+ ' "$EVENTS_FILE" 2>/dev/null || echo "[]"
149
+ }
150
+
151
+ # ─── Severity Classification ───────────────────────────────────────────────
152
+
153
+ classify_severity() {
154
+ local failure_type="$1"
155
+ local impact_scope="$2" # Number of affected resources
156
+
157
+ case "$failure_type" in
158
+ deploy.failed|pipeline.critical_error)
159
+ echo "P0"
160
+ ;;
161
+ test.regression|stage.failed)
162
+ if [[ "$impact_scope" -gt 5 ]]; then
163
+ echo "P0"
164
+ else
165
+ echo "P1"
166
+ fi
167
+ ;;
168
+ stage.timeout|health_check.failed)
169
+ echo "P2"
170
+ ;;
171
+ *)
172
+ echo "P3"
173
+ ;;
174
+ esac
175
+ }
176
+
177
+ # ─── Root Cause Analysis ───────────────────────────────────────────────────
178
+
179
+ analyze_root_cause() {
180
+ local failure_log="$1"
181
+ local config="$2"
182
+
183
+ local timeout_hits error_hits memory_hits dependency_hits
184
+ timeout_hits=$(echo "$failure_log" | grep -ic "timeout\|deadline\|too slow" || echo "0")
185
+ memory_hits=$(echo "$failure_log" | grep -ic "out of memory\|OOM\|heap" || echo "0")
186
+ dependency_hits=$(echo "$failure_log" | grep -ic "dependency\|import\|require\|not found" || echo "0")
187
+ error_hits=$(echo "$failure_log" | grep -c . || echo "0")
188
+
189
+ if [[ "$timeout_hits" -gt 0 ]]; then
190
+ echo "Performance degradation: Timeout detected (${timeout_hits} occurrences)"
191
+ elif [[ "$memory_hits" -gt 0 ]]; then
192
+ echo "Memory pressure: OOM or heap allocation issue (${memory_hits} occurrences)"
193
+ elif [[ "$dependency_hits" -gt 0 ]]; then
194
+ echo "Dependency failure: Missing or incompatible dependency (${dependency_hits} occurrences)"
195
+ else
196
+ echo "Unknown cause: Check logs (${error_hits} error lines)"
197
+ fi
198
+ }
199
+
200
+ # ─── Incident Record Management ─────────────────────────────────────────────
201
+
202
+ create_incident_record() {
203
+ local incident_id="$1"
204
+ local severity="$2"
205
+ local root_cause="$3"
206
+ local failure_events="$4"
207
+
208
+ local incident_file="${INCIDENTS_DIR}/${incident_id}.json"
209
+ local created_at
210
+ created_at="$(now_iso)"
211
+
212
+ cat > "$incident_file" << EOF
213
+ {
214
+ "id": "$incident_id",
215
+ "created_at": "$created_at",
216
+ "severity": "$severity",
217
+ "status": "open",
218
+ "root_cause": "$root_cause",
219
+ "failure_events": $failure_events,
220
+ "timeline": [],
221
+ "remediation": null,
222
+ "resolved_at": null,
223
+ "mttr_seconds": null,
224
+ "post_mortem_url": null
225
+ }
226
+ EOF
227
+
228
+ emit_event "incident.created" "incident_id=$incident_id" "severity=$severity"
229
+ }
230
+
231
+ # ─── Hotfix Creation ───────────────────────────────────────────────────────
232
+
233
+ create_hotfix_issue() {
234
+ local incident_id="$1"
235
+ local severity="$2"
236
+ local root_cause="$3"
237
+
238
+ if ! command -v gh &>/dev/null; then
239
+ warn "gh CLI not found, skipping GitHub issue creation"
240
+ return 1
241
+ fi
242
+
243
+ local title="[HOTFIX] $severity: $root_cause"
244
+ local body="**Incident ID:** $incident_id
245
+ **Severity:** $severity
246
+ **Root Cause:** $root_cause
247
+
248
+ ## Timeline
249
+ See incident details: \`shipwright incident show $incident_id\`
250
+
251
+ ## Automated Detection
252
+ This issue was automatically created by the incident commander.
253
+ "
254
+
255
+ # shipwright label so daemon picks up; hotfix for routing
256
+ local issue_url
257
+ issue_url=$(gh issue create --title "$title" --body "$body" --label "hotfix,shipwright" 2>/dev/null || echo "")
258
+
259
+ if [[ -n "$issue_url" ]]; then
260
+ success "Created hotfix issue: $issue_url"
261
+ local issue_num
262
+ issue_num=$(echo "$issue_url" | sed -n 's|.*/issues/\([0-9]*\)|\1|p')
263
+ [[ -n "$issue_num" ]] && echo "$issue_num"
264
+ return 0
265
+ fi
266
+
267
+ warn "Failed to create GitHub issue"
268
+ return 1
269
+ }
270
+
271
+ # Trigger pipeline for P0/P1 hotfix issue (auto-remediation)
272
+ trigger_pipeline_for_incident() {
273
+ local issue_num="$1"
274
+ local incident_id="$2"
275
+ if [[ -z "$issue_num" || ! "$issue_num" =~ ^[0-9]+$ ]]; then
276
+ return 0
277
+ fi
278
+ if [[ ! -x "$SCRIPT_DIR/sw-pipeline.sh" ]]; then
279
+ return 0
280
+ fi
281
+ info "Auto-triggering pipeline for P0/P1 hotfix issue #${issue_num} (incident: $incident_id)"
282
+ (cd "$REPO_DIR" && export REPO_DIR SCRIPT_DIR && bash "$SCRIPT_DIR/sw-pipeline.sh" start --issue "$issue_num" --template hotfix 2>/dev/null) &
283
+ emit_event "incident.pipeline_triggered" "incident_id=$incident_id" "issue=$issue_num"
284
+ }
285
+
286
+ # Execute rollback when auto_rollback_enabled (wire to sw-feedback / sw-github-deploy)
287
+ trigger_rollback_for_incident() {
288
+ local incident_id="$1"
289
+ local reason="${2:-P0/P1 incident}"
290
+ if [[ ! -x "$SCRIPT_DIR/sw-feedback.sh" ]]; then
291
+ return 0
292
+ fi
293
+ info "Auto-rollback triggered for incident $incident_id: $reason"
294
+ (cd "$REPO_DIR" && bash "$SCRIPT_DIR/sw-feedback.sh" rollback production "$reason" 2>/dev/null) || true
295
+ emit_event "incident.rollback_triggered" "incident_id=$incident_id" "reason=$reason"
296
+ }
297
+
298
+ # ─── Watch Command ─────────────────────────────────────────────────────────
299
+
300
+ cmd_watch() {
301
+ local interval="${1:-60}"
302
+
303
+ if [[ -f "$MONITOR_PID_FILE" ]]; then
304
+ local old_pid
305
+ old_pid=$(cat "$MONITOR_PID_FILE" 2>/dev/null || echo "")
306
+ if [[ -n "$old_pid" ]] && kill -0 "$old_pid" 2>/dev/null; then
307
+ warn "Monitor already running with PID $old_pid"
308
+ return 1
309
+ fi
310
+ fi
311
+
312
+ info "Starting incident monitoring (interval: ${interval}s)"
313
+
314
+ # Background process
315
+ (
316
+ echo $$ > "$MONITOR_PID_FILE"
317
+ trap 'rm -f "'"$MONITOR_PID_FILE"'"' EXIT
318
+
319
+ while true; do
320
+ sleep "$interval"
321
+
322
+ # Check for recent failures
323
+ local failures_json
324
+ failures_json=$(get_recent_failures "$interval")
325
+ local failure_count
326
+ failure_count=$(echo "$failures_json" | jq 'length')
327
+
328
+ if [[ "$failure_count" -gt 0 ]]; then
329
+ info "Detected $failure_count failure(s)"
330
+
331
+ # Generate incident
332
+ local incident_id
333
+ incident_id="inc-$(date +%s)"
334
+
335
+ local severity
336
+ severity=$(classify_severity "$(echo "$failures_json" | jq -r '.[0].type')" "$failure_count")
337
+
338
+ local root_cause
339
+ root_cause=$(analyze_root_cause "$(echo "$failures_json" | jq -r '.[0] | tostring')" "$INCIDENT_CONFIG")
340
+
341
+ create_incident_record "$incident_id" "$severity" "$root_cause" "$failures_json"
342
+
343
+ info "Incident $incident_id created (severity: $severity)"
344
+ emit_event "incident.detected" "incident_id=$incident_id" "severity=$severity"
345
+
346
+ # Auto-response for P0/P1: hotfix issue, trigger pipeline, optional rollback
347
+ if [[ "$severity" == "P0" ]] || [[ "$severity" == "P1" ]]; then
348
+ local auto_rollback
349
+ auto_rollback=$(jq -r '.auto_rollback_enabled // false' "$INCIDENT_CONFIG" 2>/dev/null || echo "false")
350
+ if [[ "$auto_rollback" == "true" ]]; then
351
+ trigger_rollback_for_incident "$incident_id" "P0/P1 incident: $root_cause"
352
+ fi
353
+ local auto_hotfix
354
+ auto_hotfix=$(jq -r '.p0_auto_hotfix // .p1_auto_hotfix' "$INCIDENT_CONFIG" 2>/dev/null || echo "false")
355
+ if [[ "$auto_hotfix" == "true" ]]; then
356
+ local issue_num
357
+ issue_num=$(create_hotfix_issue "$incident_id" "$severity" "$root_cause")
358
+ if [[ -n "$issue_num" ]]; then
359
+ trigger_pipeline_for_incident "$issue_num" "$incident_id"
360
+ fi
361
+ fi
362
+ fi
363
+ fi
364
+ done
365
+ ) &
366
+
367
+ success "Monitor started in background (PID: $!)"
368
+ }
369
+
370
+ # ─── List Command ──────────────────────────────────────────────────────────
371
+
372
+ cmd_list() {
373
+ local format="${1:-table}"
374
+
375
+ local incident_files
376
+ incident_files=$(find "$INCIDENTS_DIR" -name '*.json' -not -name '*postmortem*' -type f 2>/dev/null || true)
377
+
378
+ if [[ -z "$incident_files" ]]; then
379
+ info "No incidents recorded"
380
+ return 0
381
+ fi
382
+
383
+ case "$format" in
384
+ json)
385
+ echo "["
386
+ local first=true
387
+ while IFS= read -r incident_file; do
388
+ [[ -z "$incident_file" ]] && continue
389
+ if [[ "$first" == true ]]; then
390
+ first=false
391
+ else
392
+ echo ","
393
+ fi
394
+ cat "$incident_file"
395
+ done <<< "$incident_files"
396
+ echo "]"
397
+ ;;
398
+ *)
399
+ echo -e "${BOLD}Recent Incidents${RESET}"
400
+ echo -e "${DIM}────────────────────────────────────────────────────────────────${RESET}"
401
+
402
+ while IFS= read -r incident_file; do
403
+ [[ -z "$incident_file" ]] && continue
404
+
405
+ local id severity status cause
406
+ id=$(jq -r '.id // "unknown"' "$incident_file" 2>/dev/null || echo "unknown")
407
+ severity=$(jq -r '.severity // "P3"' "$incident_file" 2>/dev/null || echo "P3")
408
+ status=$(jq -r '.status // "open"' "$incident_file" 2>/dev/null || echo "open")
409
+ cause=$(jq -r '.root_cause // "unknown"' "$incident_file" 2>/dev/null || echo "unknown")
410
+ cause="${cause:0:50}"
411
+
412
+ case "$severity" in
413
+ P0) severity="${RED}${BOLD}$severity${RESET}" ;;
414
+ P1) severity="${YELLOW}${BOLD}$severity${RESET}" ;;
415
+ P2) severity="${BLUE}$severity${RESET}" ;;
416
+ *) severity="${DIM}$severity${RESET}" ;;
417
+ esac
418
+
419
+ printf "%-20s %s %-8s %s\n" "$id" "$severity" "$status" "$cause"
420
+ done <<< "$incident_files"
421
+ ;;
422
+ esac
423
+ }
424
+
425
+ # ─── Show Command ──────────────────────────────────────────────────────────
426
+
427
+ cmd_show() {
428
+ local incident_id="$1"
429
+ [[ -z "$incident_id" ]] && { error "Usage: shipwright incident show <incident_id>"; return 1; }
430
+
431
+ local incident_file="${INCIDENTS_DIR}/${incident_id}.json"
432
+ [[ ! -f "$incident_file" ]] && { error "Incident not found: $incident_id"; return 1; }
433
+
434
+ info "Incident: $incident_id"
435
+ echo ""
436
+
437
+ jq . "$incident_file" | while read -r line; do
438
+ echo " $line"
439
+ done
440
+ }
441
+
442
+ # ─── Report Command ────────────────────────────────────────────────────────
443
+
444
+ cmd_report() {
445
+ local incident_id="$1"
446
+ [[ -z "$incident_id" ]] && { error "Usage: shipwright incident report <incident_id>"; return 1; }
447
+
448
+ local incident_file="${INCIDENTS_DIR}/${incident_id}.json"
449
+ [[ ! -f "$incident_file" ]] && { error "Incident not found: $incident_id"; return 1; }
450
+
451
+ local incident
452
+ incident=$(jq . "$incident_file")
453
+
454
+ local report_file="${INCIDENTS_DIR}/${incident_id}-postmortem.md"
455
+
456
+ cat > "$report_file" << EOF
457
+ # Post-Incident Report
458
+ **Incident ID:** $incident_id
459
+ **Generated:** $(now_iso)
460
+
461
+ ## Summary
462
+ $(echo "$incident" | jq -r '.root_cause')
463
+
464
+ ## Timeline
465
+ EOF
466
+
467
+ echo "$incident" | jq -r '.failure_events[] | "- \(.ts): \(.type)"' >> "$report_file"
468
+
469
+ cat >> "$report_file" << EOF
470
+
471
+ ## Impact
472
+ - Severity: $(echo "$incident" | jq -r '.severity')
473
+ - Status: $(echo "$incident" | jq -r '.status')
474
+
475
+ ## Resolution
476
+ $(echo "$incident" | jq -r '.remediation // "Pending"')
477
+
478
+ ## Prevention
479
+ 1. Monitor for similar patterns
480
+ 2. Add alerting thresholds
481
+ 3. Improve automated detection
482
+ EOF
483
+
484
+ success "Report generated: $report_file"
485
+ echo "$report_file"
486
+ }
487
+
488
+ # ─── Stats Command ──────────────────────────────────────────────────────────
489
+
490
+ cmd_stats() {
491
+ local format="${1:-table}"
492
+
493
+ if [[ ! -d "$INCIDENTS_DIR" ]] || [[ -z "$(ls -1 "$INCIDENTS_DIR"/*.json 2>/dev/null | grep -v postmortem)" ]]; then
494
+ info "No incident data available"
495
+ return 0
496
+ fi
497
+
498
+ local total_incidents
499
+ total_incidents=$(ls -1 "$INCIDENTS_DIR"/*.json 2>/dev/null | grep -v postmortem | wc -l)
500
+
501
+ local incident_files
502
+ incident_files=$(find "$INCIDENTS_DIR" -name '*.json' -not -name '*postmortem*' -type f 2>/dev/null || true)
503
+ local p0_count p1_count p2_count p3_count resolved_count mttr_sum mttr_avg
504
+ p0_count=0
505
+ p1_count=0
506
+ p2_count=0
507
+ p3_count=0
508
+ resolved_count=0
509
+ mttr_sum=0
510
+
511
+ while IFS= read -r incident_file; do
512
+ [[ -z "$incident_file" ]] && continue
513
+ local sev status mttr
514
+ sev=$(jq -r '.severity // "P3"' "$incident_file" 2>/dev/null || echo "P3")
515
+ status=$(jq -r '.status // "open"' "$incident_file" 2>/dev/null || echo "open")
516
+ mttr=$(jq -r '.mttr_seconds // 0' "$incident_file" 2>/dev/null || echo "0")
517
+
518
+ case "$sev" in
519
+ P0) ((p0_count++)) ;;
520
+ P1) ((p1_count++)) ;;
521
+ P2) ((p2_count++)) ;;
522
+ *) ((p3_count++)) ;;
523
+ esac
524
+
525
+ if [[ "$status" == "resolved" ]]; then
526
+ ((resolved_count++))
527
+ mttr_sum=$((mttr_sum + mttr))
528
+ fi
529
+ done <<< "$incident_files"
530
+
531
+ mttr_avg=0
532
+ if [[ "$resolved_count" -gt 0 ]]; then
533
+ mttr_avg=$((mttr_sum / resolved_count))
534
+ fi
535
+
536
+ case "$format" in
537
+ json)
538
+ jq -n \
539
+ --arg total "$total_incidents" \
540
+ --arg p0 "$p0_count" \
541
+ --arg p1 "$p1_count" \
542
+ --arg p2 "$p2_count" \
543
+ --arg p3 "$p3_count" \
544
+ --arg resolved "$resolved_count" \
545
+ --arg mttr "$mttr_avg" \
546
+ '{
547
+ total: ($total | tonumber),
548
+ by_severity: {p0: ($p0 | tonumber), p1: ($p1 | tonumber), p2: ($p2 | tonumber), p3: ($p3 | tonumber)},
549
+ resolved: ($resolved | tonumber),
550
+ mttr_seconds: ($mttr | tonumber)
551
+ }'
552
+ ;;
553
+ *)
554
+ echo -e "${BOLD}Incident Statistics${RESET}"
555
+ echo -e "${DIM}────────────────────────────────────────────────────────────────${RESET}"
556
+ echo "Total Incidents: $total_incidents"
557
+ echo " P0 (Critical): $p0_count"
558
+ echo " P1 (High): $p1_count"
559
+ echo " P2 (Medium): $p2_count"
560
+ echo " P3 (Low): $p3_count"
561
+ echo ""
562
+ echo "Resolved: $resolved_count"
563
+ echo "MTTR (avg): $(format_duration "$mttr_avg")"
564
+ ;;
565
+ esac
566
+ }
567
+
568
+ # ─── Stop Command ──────────────────────────────────────────────────────────
569
+
570
+ cmd_stop() {
571
+ if [[ -f "$MONITOR_PID_FILE" ]]; then
572
+ local pid
573
+ pid=$(cat "$MONITOR_PID_FILE" 2>/dev/null || echo "")
574
+ if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
575
+ kill "$pid"
576
+ rm -f "$MONITOR_PID_FILE"
577
+ success "Monitor stopped (PID: $pid)"
578
+ else
579
+ warn "Monitor not running"
580
+ fi
581
+ else
582
+ warn "Monitor not running"
583
+ fi
584
+ }
585
+
586
+ # ─── Help Command ──────────────────────────────────────────────────────────
587
+
588
+ show_help() {
589
+ echo -e "${CYAN}${BOLD}shipwright incident${RESET} — Autonomous incident detection & response"
590
+ echo ""
591
+ echo -e "${BOLD}USAGE${RESET}"
592
+ echo -e " ${CYAN}shipwright incident${RESET} <command> [options]"
593
+ echo ""
594
+ echo -e "${BOLD}COMMANDS${RESET}"
595
+ echo -e " ${CYAN}watch${RESET} [interval] Start monitoring for incidents (default: 60s)"
596
+ echo -e " ${CYAN}stop${RESET} Stop incident monitoring"
597
+ echo -e " ${CYAN}list${RESET} [format] List recent incidents (table|json)"
598
+ echo -e " ${CYAN}show${RESET} <incident-id> Show details for an incident"
599
+ echo -e " ${CYAN}report${RESET} <incident-id> Generate post-mortem report"
600
+ echo -e " ${CYAN}stats${RESET} [format] Show incident statistics (table|json)"
601
+ echo -e " ${CYAN}config${RESET} <cmd> Configure incident response (show|set)"
602
+ echo -e " ${CYAN}help${RESET} Show this help"
603
+ echo ""
604
+ echo -e "${BOLD}EXAMPLES${RESET}"
605
+ echo -e " ${DIM}shipwright incident watch # Start monitoring${RESET}"
606
+ echo -e " ${DIM}shipwright incident list # Show all incidents${RESET}"
607
+ echo -e " ${DIM}shipwright incident show inc-1702 # Show incident details${RESET}"
608
+ echo -e " ${DIM}shipwright incident report inc-1702 # Generate post-mortem${RESET}"
609
+ echo -e " ${DIM}shipwright incident stats # Show MTTR and frequency${RESET}"
610
+ }
611
+
612
+ # ─── Main Router ───────────────────────────────────────────────────────────
613
+
614
+ main() {
615
+ ensure_incident_dir
616
+
617
+ local cmd="${1:-help}"
618
+ shift 2>/dev/null || true
619
+
620
+ case "$cmd" in
621
+ watch)
622
+ cmd_watch "$@"
623
+ ;;
624
+ stop)
625
+ cmd_stop "$@"
626
+ ;;
627
+ list)
628
+ cmd_list "$@"
629
+ ;;
630
+ show)
631
+ cmd_show "$@"
632
+ ;;
633
+ report)
634
+ cmd_report "$@"
635
+ ;;
636
+ stats)
637
+ cmd_stats "$@"
638
+ ;;
639
+ config)
640
+ error "config command not yet implemented"
641
+ return 1
642
+ ;;
643
+ help|--help|-h)
644
+ show_help
645
+ ;;
646
+ *)
647
+ error "Unknown command: $cmd"
648
+ show_help
649
+ exit 1
650
+ ;;
651
+ esac
652
+ }
653
+
654
+ if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
655
+ main "$@"
656
+ fi