shipwright-cli 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +21 -7
  2. package/config/defaults.json +25 -2
  3. package/config/policy.json +1 -1
  4. package/dashboard/public/index.html +6 -0
  5. package/dashboard/public/styles.css +76 -0
  6. package/dashboard/server.ts +51 -0
  7. package/dashboard/src/core/api.ts +5 -0
  8. package/dashboard/src/types/api.ts +10 -0
  9. package/dashboard/src/views/metrics.ts +69 -1
  10. package/package.json +1 -1
  11. package/scripts/lib/daemon-adaptive.sh +4 -2
  12. package/scripts/lib/daemon-patrol.sh +2 -2
  13. package/scripts/lib/daemon-state.sh +7 -0
  14. package/scripts/lib/helpers.sh +3 -1
  15. package/scripts/lib/pipeline-detection.sh +1 -1
  16. package/scripts/lib/pipeline-intelligence.sh +5 -3
  17. package/scripts/lib/pipeline-quality-checks.sh +8 -4
  18. package/scripts/lib/pipeline-stages.sh +132 -2
  19. package/scripts/sw +1 -1
  20. package/scripts/sw-activity.sh +1 -7
  21. package/scripts/sw-adaptive.sh +7 -7
  22. package/scripts/sw-adversarial.sh +1 -1
  23. package/scripts/sw-architecture-enforcer.sh +1 -1
  24. package/scripts/sw-auth.sh +1 -1
  25. package/scripts/sw-autonomous.sh +1 -1
  26. package/scripts/sw-changelog.sh +1 -1
  27. package/scripts/sw-checkpoint.sh +1 -1
  28. package/scripts/sw-ci.sh +11 -6
  29. package/scripts/sw-cleanup.sh +1 -1
  30. package/scripts/sw-code-review.sh +36 -17
  31. package/scripts/sw-connect.sh +1 -1
  32. package/scripts/sw-context.sh +1 -1
  33. package/scripts/sw-cost.sh +60 -3
  34. package/scripts/sw-daemon.sh +5 -2
  35. package/scripts/sw-dashboard.sh +1 -1
  36. package/scripts/sw-db.sh +13 -5
  37. package/scripts/sw-decide.sh +1 -1
  38. package/scripts/sw-decompose.sh +1 -1
  39. package/scripts/sw-deps.sh +1 -1
  40. package/scripts/sw-developer-simulation.sh +1 -1
  41. package/scripts/sw-discovery.sh +54 -4
  42. package/scripts/sw-doc-fleet.sh +1 -1
  43. package/scripts/sw-docs-agent.sh +1 -1
  44. package/scripts/sw-docs.sh +1 -1
  45. package/scripts/sw-doctor.sh +1 -1
  46. package/scripts/sw-dora.sh +1 -1
  47. package/scripts/sw-durable.sh +9 -5
  48. package/scripts/sw-e2e-orchestrator.sh +1 -1
  49. package/scripts/sw-eventbus.sh +7 -4
  50. package/scripts/sw-evidence.sh +1 -1
  51. package/scripts/sw-feedback.sh +1 -1
  52. package/scripts/sw-fix.sh +1 -1
  53. package/scripts/sw-fleet-discover.sh +1 -1
  54. package/scripts/sw-fleet-viz.sh +6 -4
  55. package/scripts/sw-fleet.sh +1 -1
  56. package/scripts/sw-github-app.sh +3 -2
  57. package/scripts/sw-github-checks.sh +1 -1
  58. package/scripts/sw-github-deploy.sh +1 -1
  59. package/scripts/sw-github-graphql.sh +1 -1
  60. package/scripts/sw-guild.sh +1 -1
  61. package/scripts/sw-heartbeat.sh +1 -1
  62. package/scripts/sw-hygiene.sh +5 -3
  63. package/scripts/sw-incident.sh +9 -5
  64. package/scripts/sw-init.sh +1 -1
  65. package/scripts/sw-instrument.sh +1 -1
  66. package/scripts/sw-intelligence.sh +3 -2
  67. package/scripts/sw-jira.sh +1 -1
  68. package/scripts/sw-launchd.sh +1 -1
  69. package/scripts/sw-linear.sh +1 -1
  70. package/scripts/sw-logs.sh +1 -1
  71. package/scripts/sw-loop.sh +72 -16
  72. package/scripts/sw-memory.sh +2 -2
  73. package/scripts/sw-mission-control.sh +1 -1
  74. package/scripts/sw-model-router.sh +3 -2
  75. package/scripts/sw-otel.sh +4 -2
  76. package/scripts/sw-oversight.sh +1 -1
  77. package/scripts/sw-pipeline-composer.sh +3 -1
  78. package/scripts/sw-pipeline-vitals.sh +11 -6
  79. package/scripts/sw-pipeline.sh +20 -8
  80. package/scripts/sw-pm.sh +5 -4
  81. package/scripts/sw-pr-lifecycle.sh +1 -1
  82. package/scripts/sw-predictive.sh +11 -5
  83. package/scripts/sw-prep.sh +1 -1
  84. package/scripts/sw-ps.sh +1 -1
  85. package/scripts/sw-public-dashboard.sh +3 -2
  86. package/scripts/sw-quality.sh +13 -6
  87. package/scripts/sw-reaper.sh +1 -1
  88. package/scripts/sw-recruit.sh +1 -1
  89. package/scripts/sw-regression.sh +1 -1
  90. package/scripts/sw-release-manager.sh +1 -1
  91. package/scripts/sw-release.sh +1 -1
  92. package/scripts/sw-remote.sh +1 -1
  93. package/scripts/sw-replay.sh +1 -1
  94. package/scripts/sw-retro.sh +1 -1
  95. package/scripts/sw-review-rerun.sh +1 -1
  96. package/scripts/sw-scale.sh +5 -3
  97. package/scripts/sw-security-audit.sh +1 -1
  98. package/scripts/sw-self-optimize.sh +168 -4
  99. package/scripts/sw-session.sh +1 -1
  100. package/scripts/sw-setup.sh +1 -1
  101. package/scripts/sw-standup.sh +1 -1
  102. package/scripts/sw-status.sh +1 -1
  103. package/scripts/sw-strategic.sh +11 -6
  104. package/scripts/sw-stream.sh +7 -4
  105. package/scripts/sw-swarm.sh +3 -2
  106. package/scripts/sw-team-stages.sh +1 -1
  107. package/scripts/sw-templates.sh +3 -3
  108. package/scripts/sw-testgen.sh +11 -6
  109. package/scripts/sw-tmux-pipeline.sh +1 -1
  110. package/scripts/sw-tmux.sh +35 -1
  111. package/scripts/sw-trace.sh +1 -1
  112. package/scripts/sw-tracker.sh +1 -1
  113. package/scripts/sw-triage.sh +2 -2
  114. package/scripts/sw-upgrade.sh +1 -1
  115. package/scripts/sw-ux.sh +1 -1
  116. package/scripts/sw-webhook.sh +3 -2
  117. package/scripts/sw-widgets.sh +7 -4
  118. package/scripts/sw-worktree.sh +1 -1
package/README.md CHANGED
@@ -13,7 +13,7 @@
13
13
  <a href="https://github.com/sethdford/shipwright/actions/workflows/test.yml"><img src="https://github.com/sethdford/shipwright/actions/workflows/test.yml/badge.svg" alt="Tests"></a>
14
14
  <a href="https://github.com/sethdford/shipwright/actions/workflows/shipwright-pipeline.yml"><img src="https://github.com/sethdford/shipwright/actions/workflows/shipwright-pipeline.yml/badge.svg" alt="Pipeline"></a>
15
15
  <img src="https://img.shields.io/badge/tests-141_suites_passing-4ade80?style=flat-square" alt="141 suites">
16
- <img src="https://img.shields.io/badge/version-3.1.0-00d4ff?style=flat-square" alt="v3.1.0">
16
+ <img src="https://img.shields.io/badge/version-3.2.0-00d4ff?style=flat-square" alt="v3.2.0">
17
17
  <img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" alt="MIT License">
18
18
  <img src="https://img.shields.io/badge/bash-3.2%2B-7c3aed?style=flat-square" alt="Bash 3.2+">
19
19
  </p>
@@ -24,7 +24,7 @@
24
24
 
25
25
  - [Shipwright Builds Itself](#shipwright-builds-itself)
26
26
  - [Code Factory Pattern](#code-factory-pattern)
27
- - [What's New in v3.1.0](#whats-new-in-v310)
27
+ - [What's New in v3.2.0](#whats-new-in-v320)
28
28
  - [How It Works](#how-it-works)
29
29
  - [Install](#install)
30
30
  - [Quick Start](#quick-start)
@@ -77,7 +77,8 @@ Shipwright extends the Code Factory pattern with capabilities most implementatio
77
77
  - **12-stage pipeline** with self-healing builds, adversarial review, and compound quality gates
78
78
  - **Predictive risk scoring** using GitHub signals (security alerts, contributor expertise, file churn)
79
79
  - **Persistent memory** — failure patterns, fix effectiveness, and prediction accuracy compound over time
80
- - **Auto-learning** — self-optimize runs automatically after every pipeline completion
80
+ - **Auto-learning** — self-optimize runs automatically after every pipeline completion, including context efficiency tuning
81
+ - **Decision engine** — tiered autonomous decisions with outcome learning and deduplication
81
82
  - **Unified model routing** — single source of truth for model selection across all components
82
83
  - **Evidence-gated merges** — SHA discipline ensures all evidence validated against current PR head
83
84
  - **Semantic quality audits** — Claude-powered audits with grep fallback when Claude unavailable
@@ -108,7 +109,7 @@ shipwright incident gap sla
108
109
 
109
110
  ---
110
111
 
111
- ## What's New in v3.1.0
112
+ ## What's New in v3.2.0
112
113
 
113
114
  **Code Factory pattern** — deterministic, risk-aware agent delivery with machine-verifiable evidence:
114
115
 
@@ -282,7 +283,7 @@ Each stage is configurable with quality gates that auto-proceed or pause for app
282
283
 
283
284
  ### Intelligence Layer
284
285
 
285
- 7 modules that make the pipeline smarter over time. **Auto mode**: intelligence is enabled when Claude CLI is available; set `intelligence.enabled=false` to disable. All modules degrade gracefully.
286
+ 7 modules that make the pipeline smarter over time. **Enabled by default**: intelligence is on when Claude CLI is available, with optimization and prediction active out of the box. Set `intelligence.enabled=false` to disable. All modules degrade gracefully.
286
287
 
287
288
  | Module | What It Does |
288
289
  | ---------------------------- | --------------------------------------------------------------------------------------------------------------------- |
@@ -290,7 +291,7 @@ Each stage is configurable with quality gates that auto-proceed or pause for app
290
291
  | **Pipeline Composer** | Generates custom pipeline configs from codebase analysis (file churn, test coverage, dependencies) |
291
292
  | **Predictive Risk** | Scores issues for risk using GitHub signals (security alerts, similar past issues, contributor expertise) |
292
293
  | **Adversarial Review** | Red-team code review — finds security flaws, edge cases, failure modes. Cross-checks against CodeQL/Dependabot alerts |
293
- | **Self-Optimization** | Reads DORA metrics and auto-tunes daemon config. Proportional template weighting, adaptive memory timescales |
294
+ | **Self-Optimization** | Reads DORA metrics and auto-tunes daemon config. Includes context efficiency closed loop for token budget tuning |
294
295
  | **Developer Simulation** | 3-persona review (security, performance, maintainability) before PR creation |
295
296
  | **Architecture Enforcement** | Living architectural model with violation detection and dependency direction rules |
296
297
 
@@ -309,6 +310,19 @@ Native GitHub API integration enriches every intelligence module:
309
310
  | **Contributors** | CODEOWNERS-based reviewer routing, top-contributor fallback, auto-approve as last resort |
310
311
  | **Branch Protection** | Checks required reviews and status checks before attempting auto-merge |
311
312
 
313
+ ### Decision Engine
314
+
315
+ The autonomous decision engine (`config/policy.json` → `decision` section) handles routine operational decisions with outcome learning. Decisions are tiered by risk, with low-risk actions auto-approved and higher tiers escalated. The engine learns from outcomes to improve future decisions.
316
+
317
+ ### Context Engineering
318
+
319
+ Intelligent context window management for pipeline agents:
320
+
321
+ - **Budget-aware trimming** — Configurable character budgets for prompt composition (`context_budget_chars`)
322
+ - **Section-level trimming** — Independent limits for memory, git history, hotspot files, and test output
323
+ - **Context efficiency metrics** — Tracks budget utilization and trim ratios per iteration
324
+ - **Self-tuning** — The self-optimization loop analyzes context efficiency events and recommends budget adjustments
325
+
312
326
  ### Autonomous Daemon
313
327
 
314
328
  ```bash
@@ -354,7 +368,7 @@ Per-pipeline cost tracking with model pricing, budget enforcement, and ROI analy
354
368
  shipwright dashboard start
355
369
  ```
356
370
 
357
- Web dashboard with live pipeline progress, GitHub context (security alerts, contributors, deployments), DORA metrics, and cost tracking. WebSocket-powered, updates in real-time.
371
+ Web dashboard with live pipeline progress, GitHub context (security alerts, contributors, deployments), DORA metrics, cost tracking, and context efficiency metrics. WebSocket-powered, updates in real-time.
358
372
 
359
373
  ### Webhook Receiver
360
374
 
@@ -25,6 +25,7 @@
25
25
  "build_test_retries": 3,
26
26
  "claude_timeout": 1800,
27
27
  "heartbeat_interval": 30,
28
+ "composed_cache_ttl": 3600,
28
29
  "branch_pattern": "shipwright/issue-{issue}",
29
30
  "stage_order": [
30
31
  "intake",
@@ -47,7 +48,12 @@
47
48
  "max_restarts": 0,
48
49
  "fast_test_interval": 5,
49
50
  "convergence_threshold": 3,
50
- "multi_agent_sleep": 5
51
+ "multi_agent_sleep": 5,
52
+ "context_budget_chars": 180000,
53
+ "context_trim_memory_chars": 20000,
54
+ "context_trim_git_entries": 10,
55
+ "context_trim_hotspot_files": 5,
56
+ "context_trim_test_lines": 50
51
57
  },
52
58
  "dashboard": {
53
59
  "port": 8767,
@@ -77,12 +83,29 @@
77
83
  "ab_test_ratio": 0.2,
78
84
  "claude_timeout": 60
79
85
  },
86
+ "predictive": {
87
+ "default_risk_score": 50,
88
+ "keyword_risk_score": 70
89
+ },
90
+ "api_optimization": {
91
+ "programmatic_tool_calling": true,
92
+ "tool_search_enabled": true,
93
+ "tool_search_type": "bm25",
94
+ "defer_unused_tools": true,
95
+ "web_search_version": "web_search_20260209",
96
+ "web_fetch_version": "web_fetch_20260209",
97
+ "dynamic_filtering": true,
98
+ "code_execution_sandbox": true,
99
+ "beta_header": "code-execution-web-tools-2026-02-09"
100
+ },
80
101
  "quality": {
81
102
  "gate_score_threshold": 70,
82
103
  "secret_threshold": 3,
83
104
  "min_file_count": 10,
84
105
  "score_weight_per_file": 25,
85
- "pass_rate_threshold": 5.0
106
+ "pass_rate_threshold": 5.0,
107
+ "bundle_growth_legacy_pct": 20,
108
+ "perf_regression_legacy_pct": 30
86
109
  },
87
110
  "cleanup": {
88
111
  "artifact_age_days": 7,
@@ -224,7 +224,7 @@
224
224
  "promote_threshold_success_rate": 85
225
225
  },
226
226
  "decision": {
227
- "enabled": false,
227
+ "enabled": true,
228
228
  "cycle_interval_seconds": 1800,
229
229
  "tiers_file": "config/decision-tiers.json",
230
230
  "outcome_learning_enabled": true,
@@ -516,6 +516,12 @@
516
516
  id="cost-trend-container"
517
517
  ></div>
518
518
 
519
+ <!-- Context efficiency -->
520
+ <div
521
+ class="metric-card metric-card-wide"
522
+ id="context-efficiency-container"
523
+ ></div>
524
+
519
525
  <!-- DORA trend -->
520
526
  <div
521
527
  class="metric-card metric-card-wide"
@@ -3634,6 +3634,82 @@ body::-webkit-scrollbar-thumb {
3634
3634
  text-align: center;
3635
3635
  }
3636
3636
 
3637
+ /* Context efficiency widget */
3638
+ .ctx-eff-grid {
3639
+ display: grid;
3640
+ grid-template-columns: repeat(4, 1fr);
3641
+ gap: 16px;
3642
+ }
3643
+
3644
+ .ctx-eff-card {
3645
+ background: var(--glass-bg);
3646
+ border: 1px solid var(--glass-border);
3647
+ border-radius: 10px;
3648
+ padding: 12px;
3649
+ display: flex;
3650
+ flex-direction: column;
3651
+ gap: 6px;
3652
+ }
3653
+
3654
+ .ctx-eff-card-label {
3655
+ font-family: var(--font-mono);
3656
+ font-size: 0.65rem;
3657
+ font-weight: 600;
3658
+ letter-spacing: 0.08em;
3659
+ color: var(--text-muted);
3660
+ text-transform: uppercase;
3661
+ }
3662
+
3663
+ .ctx-eff-gauge {
3664
+ height: 8px;
3665
+ border-radius: 4px;
3666
+ background: var(--glass-border);
3667
+ overflow: hidden;
3668
+ }
3669
+
3670
+ .ctx-eff-gauge-fill {
3671
+ height: 100%;
3672
+ border-radius: 4px;
3673
+ transition: width 0.5s ease;
3674
+ }
3675
+
3676
+ .ctx-eff-gauge-fill.ctx-eff-high {
3677
+ background: #4ade80;
3678
+ }
3679
+
3680
+ .ctx-eff-gauge-fill.ctx-eff-mid {
3681
+ background: #00d4ff;
3682
+ }
3683
+
3684
+ .ctx-eff-gauge-fill.ctx-eff-low {
3685
+ background: #f43f5e;
3686
+ }
3687
+
3688
+ .ctx-eff-gauge-fill.ctx-eff-trim {
3689
+ background: #7c3aed;
3690
+ }
3691
+
3692
+ .ctx-eff-value {
3693
+ font-family: var(--font-mono);
3694
+ font-size: 0.85rem;
3695
+ font-weight: 700;
3696
+ color: var(--text-primary);
3697
+ }
3698
+
3699
+ .ctx-eff-big {
3700
+ font-family: var(--font-mono);
3701
+ font-size: 1.4rem;
3702
+ font-weight: 700;
3703
+ color: var(--text-primary);
3704
+ line-height: 1;
3705
+ }
3706
+
3707
+ .ctx-eff-sub {
3708
+ font-family: var(--font-mono);
3709
+ font-size: 0.65rem;
3710
+ color: var(--text-muted);
3711
+ }
3712
+
3637
3713
  .dora-trend-grid {
3638
3714
  display: grid;
3639
3715
  grid-template-columns: repeat(4, 1fr);
@@ -3690,6 +3690,57 @@ const server = Bun.serve({
3690
3690
  });
3691
3691
  }
3692
3692
 
3693
+ // REST: Context efficiency metrics (from loop.context_efficiency events)
3694
+ if (pathname === "/api/context-efficiency") {
3695
+ const period = parseInt(url.searchParams.get("period") || "7");
3696
+ const events = readEvents();
3697
+ const now = Math.floor(Date.now() / 1000);
3698
+ const cutoff = now - period * 86400;
3699
+
3700
+ let totalUtil = 0;
3701
+ let totalRatio = 0;
3702
+ let totalRaw = 0;
3703
+ let totalTrimmed = 0;
3704
+ let trimEvents = 0;
3705
+ let count = 0;
3706
+
3707
+ for (const e of events) {
3708
+ if ((e.ts_epoch || 0) < cutoff) continue;
3709
+ if (e.type !== "loop.context_efficiency") continue;
3710
+
3711
+ const util = parseFloat(String(e.budget_utilization || 0));
3712
+ const ratio = parseFloat(String(e.trim_ratio || 0));
3713
+ const raw = parseInt(String(e.raw_prompt_chars || 0), 10);
3714
+ const trimmed = parseInt(String(e.trimmed_prompt_chars || 0), 10);
3715
+
3716
+ totalUtil += util;
3717
+ totalRatio += ratio;
3718
+ totalRaw += raw;
3719
+ totalTrimmed += trimmed;
3720
+ if (ratio > 0) trimEvents++;
3721
+ count++;
3722
+ }
3723
+
3724
+ const avgUtilization =
3725
+ count > 0 ? Math.round((totalUtil / count) * 10) / 10 : 0;
3726
+ const avgTrimRatio =
3727
+ count > 0 ? Math.round((totalRatio / count) * 10) / 10 : 0;
3728
+ const totalDiscarded = totalRaw - totalTrimmed;
3729
+
3730
+ return new Response(
3731
+ JSON.stringify({
3732
+ avg_utilization: avgUtilization,
3733
+ avg_trim_ratio: avgTrimRatio,
3734
+ total_raw_chars: totalRaw,
3735
+ total_trimmed_chars: totalTrimmed,
3736
+ total_discarded_chars: totalDiscarded,
3737
+ trim_events: trimEvents,
3738
+ total_iterations: count,
3739
+ }),
3740
+ { headers: { "Content-Type": "application/json", ...CORS_HEADERS } },
3741
+ );
3742
+ }
3743
+
3693
3744
  // REST: DORA trend (weekly sliding windows)
3694
3745
  if (pathname === "/api/metrics/dora-trend") {
3695
3746
  const period = parseInt(url.searchParams.get("period") || "30");
@@ -7,6 +7,7 @@ import type {
7
7
  MachineInfo,
8
8
  JoinToken,
9
9
  CostBreakdown,
10
+ ContextEfficiency,
10
11
  DaemonConfig,
11
12
  AlertInfo,
12
13
  InsightsData,
@@ -113,6 +114,10 @@ export const fetchCostTrend = (period = 30) =>
113
114
  `/api/costs/trend?period=${period}`,
114
115
  );
115
116
 
117
+ // Context efficiency
118
+ export const fetchContextEfficiency = (period = 7) =>
119
+ request<ContextEfficiency>(`/api/context-efficiency?period=${period}`);
120
+
116
121
  // Daemon
117
122
  export const fetchDaemonConfig = () =>
118
123
  request<DaemonConfig>("/api/daemon/config");
@@ -255,6 +255,16 @@ export interface HeatmapData {
255
255
  heatmap: Record<string, Record<string, number>>;
256
256
  }
257
257
 
258
+ export interface ContextEfficiency {
259
+ avg_utilization: number;
260
+ avg_trim_ratio: number;
261
+ total_raw_chars: number;
262
+ total_trimmed_chars: number;
263
+ total_discarded_chars: number;
264
+ trim_events: number;
265
+ total_iterations: number;
266
+ }
267
+
258
268
  export interface DaemonConfig {
259
269
  paused?: boolean;
260
270
  config?: Record<string, unknown>;
@@ -82,10 +82,12 @@ function renderMetrics(data: MetricsData): void {
82
82
  doraContainer.style.display = "none";
83
83
  }
84
84
 
85
- // Cost breakdown/trend
85
+ // Cost breakdown/trend/context efficiency
86
86
  if (document.getElementById("cost-breakdown-container"))
87
87
  renderCostBreakdown();
88
88
  if (document.getElementById("cost-trend-container")) renderCostTrend();
89
+ if (document.getElementById("context-efficiency-container"))
90
+ renderContextEfficiency();
89
91
  if (document.getElementById("dora-trend-container")) renderDoraTrend();
90
92
  if (document.getElementById("stage-performance-container"))
91
93
  renderStagePerformance();
@@ -256,6 +258,72 @@ function renderCostTrend(): void {
256
258
  });
257
259
  }
258
260
 
261
+ function renderContextEfficiency(): void {
262
+ const container = document.getElementById("context-efficiency-container");
263
+ if (!container) return;
264
+ api
265
+ .fetchContextEfficiency()
266
+ .then((data) => {
267
+ if (!data.total_iterations) {
268
+ container.innerHTML =
269
+ '<div class="empty-state"><p>No context efficiency data</p></div>';
270
+ return;
271
+ }
272
+ const utilPct = Math.min(data.avg_utilization, 100);
273
+ const utilClass =
274
+ utilPct >= 90
275
+ ? "ctx-eff-high"
276
+ : utilPct >= 60
277
+ ? "ctx-eff-mid"
278
+ : "ctx-eff-low";
279
+ const trimPct = Math.min(data.avg_trim_ratio, 100);
280
+
281
+ let html =
282
+ '<span class="metric-label">CONTEXT EFFICIENCY</span>' +
283
+ '<div class="ctx-eff-grid">';
284
+
285
+ // Budget utilization gauge
286
+ html +=
287
+ '<div class="ctx-eff-card">' +
288
+ '<span class="ctx-eff-card-label">Budget Utilization</span>' +
289
+ `<div class="ctx-eff-gauge"><div class="ctx-eff-gauge-fill ${utilClass}" style="width:${utilPct.toFixed(0)}%"></div></div>` +
290
+ `<span class="ctx-eff-value">${data.avg_utilization.toFixed(1)}%</span>` +
291
+ "</div>";
292
+
293
+ // Trim ratio
294
+ html +=
295
+ '<div class="ctx-eff-card">' +
296
+ '<span class="ctx-eff-card-label">Avg Trim Ratio</span>' +
297
+ `<div class="ctx-eff-gauge"><div class="ctx-eff-gauge-fill ctx-eff-trim" style="width:${trimPct.toFixed(0)}%"></div></div>` +
298
+ `<span class="ctx-eff-value">${data.avg_trim_ratio.toFixed(1)}%</span>` +
299
+ "</div>";
300
+
301
+ // Chars saved
302
+ const savedK = Math.round(data.total_discarded_chars / 1000);
303
+ const totalK = Math.round(data.total_raw_chars / 1000);
304
+ html +=
305
+ '<div class="ctx-eff-card">' +
306
+ '<span class="ctx-eff-card-label">Chars Discarded</span>' +
307
+ `<span class="ctx-eff-big">${fmtNum(savedK)}K</span>` +
308
+ `<span class="ctx-eff-sub">of ${fmtNum(totalK)}K generated</span>` +
309
+ "</div>";
310
+
311
+ // Trim events
312
+ html +=
313
+ '<div class="ctx-eff-card">' +
314
+ '<span class="ctx-eff-card-label">Trim Events</span>' +
315
+ `<span class="ctx-eff-big">${data.trim_events}</span>` +
316
+ `<span class="ctx-eff-sub">of ${data.total_iterations} iterations</span>` +
317
+ "</div>";
318
+
319
+ html += "</div>";
320
+ container.innerHTML = html;
321
+ })
322
+ .catch((err) => {
323
+ container.innerHTML = `<div class="empty-state"><p>Failed to load: ${escapeHtml(String(err))}</p></div>`;
324
+ });
325
+ }
326
+
259
327
  function renderDoraTrend(): void {
260
328
  const container = document.getElementById("dora-trend-container");
261
329
  if (!container) return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "shipwright-cli",
3
- "version": "3.1.0",
3
+ "version": "3.2.0",
4
4
  "description": "Orchestrate autonomous Claude Code agent teams in tmux",
5
5
  "bin": {
6
6
  "shipwright": "scripts/sw",
@@ -223,10 +223,12 @@ daemon_collect_snapshot() {
223
223
  if [[ -d "$worktree/.git" ]] || [[ -f "$worktree/.git" ]]; then
224
224
  diff_lines=$(cd "$worktree" && git diff --stat 2>/dev/null | tail -1 | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
225
225
  [[ -z "$diff_lines" ]] && diff_lines=0
226
- files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || echo "0")
226
+ files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || true)
227
+ files_changed="${files_changed:-0}"
227
228
  # Also count untracked files the agent has created
228
229
  local untracked
229
- untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || echo "0")
230
+ untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || true)
231
+ untracked="${untracked:-0}"
230
232
  files_changed=$((files_changed + untracked))
231
233
  fi
232
234
 
@@ -853,8 +853,8 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
853
853
  usage_count=${usage_count:-0}
854
854
 
855
855
  local line_count
856
- line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ' || echo "0")
857
- line_count=${line_count:-0}
856
+ line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ' || true)
857
+ line_count="${line_count:-0}"
858
858
 
859
859
  untested_entries="${untested_entries}${usage_count}|${basename}|${line_count}\n"
860
860
  findings=$((findings + 1))
@@ -458,6 +458,13 @@ get_active_count() {
458
458
  echo 0
459
459
  return
460
460
  fi
461
+ # Validate state file JSON before parsing (mid-flight corruption check)
462
+ if ! jq empty "$STATE_FILE" 2>/dev/null; then
463
+ daemon_log WARN "State file corrupted mid-flight — backing up and resetting"
464
+ cp "$STATE_FILE" "${STATE_FILE}.corrupted.$(date +%s)" 2>/dev/null || true
465
+ init_state
466
+ return
467
+ fi
461
468
  jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0
462
469
  }
463
470
 
@@ -89,7 +89,9 @@ emit_event() {
89
89
  local _lock_file="${EVENTS_FILE}.lock"
90
90
  (
91
91
  if command -v flock >/dev/null 2>&1; then
92
- flock -w 2 200 2>/dev/null || true
92
+ if ! flock -w 2 200 2>/dev/null; then
93
+ echo "WARN: emit_event lock timeout — concurrent write possible" >&2
94
+ fi
93
95
  fi
94
96
  echo "$_event_line" >> "$EVENTS_FILE"
95
97
  ) 200>"$_lock_file"
@@ -194,7 +194,7 @@ branch_prefix_for_type() {
194
194
  fi
195
195
  fi
196
196
 
197
- # Fallback: hardcoded mapping
197
+ # Fallback: default branch prefix mapping
198
198
  case "$task_type" in
199
199
  bug) echo "fix" ;;
200
200
  refactor) echo "refactor" ;;
@@ -305,7 +305,7 @@ $content"
305
305
 
306
306
  # ──────────────────────────────────────────────────────────────────────────────
307
307
  # 3. Adaptive Cycle Limits
308
- # Replaces hardcoded max_cycles with convergence-driven limits.
308
+ # Replaces default max_cycles with convergence-driven limits.
309
309
  # Takes the base limit, returns an adjusted limit based on:
310
310
  # - Learned iteration model
311
311
  # - Convergence/divergence signals
@@ -1148,13 +1148,15 @@ stage_compound_quality() {
1148
1148
  _cq_real_changes=$(git diff --name-only "origin/${BASE_BRANCH:-main}...HEAD" \
1149
1149
  -- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
1150
1150
  ':!.claude/pipeline-artifacts/*' ':!**/progress.md' \
1151
- ':!**/error-summary.json' 2>/dev/null | wc -l || echo "0")
1151
+ ':!**/error-summary.json' 2>/dev/null | wc -l || true)
1152
+ _cq_real_changes="${_cq_real_changes:-0}"
1152
1153
  _cq_real_changes=$(echo "$_cq_real_changes" | tr -d '[:space:]')
1153
1154
  [[ -z "$_cq_real_changes" ]] && _cq_real_changes=0
1154
1155
  # Fallback: if no remote, compare against first commit
1155
1156
  if [[ "$_cq_real_changes" -eq 0 ]] 2>/dev/null; then
1156
1157
  _cq_real_changes=$(git diff --name-only "$(git rev-list --max-parents=0 HEAD 2>/dev/null)...HEAD" \
1157
- -- . ':!.claude/*' ':!**/progress.md' ':!**/error-summary.json' 2>/dev/null | wc -l || echo "0")
1158
+ -- . ':!.claude/*' ':!**/progress.md' ':!**/error-summary.json' 2>/dev/null | wc -l || true)
1159
+ _cq_real_changes="${_cq_real_changes:-0}"
1158
1160
  _cq_real_changes=$(echo "$_cq_real_changes" | tr -d '[:space:]')
1159
1161
  [[ -z "$_cq_real_changes" ]] && _cq_real_changes=0
1160
1162
  fi
@@ -141,7 +141,9 @@ quality_check_bundle_size() {
141
141
  return 1
142
142
  fi
143
143
  else
144
- # Fallback: legacy memory baseline with hardcoded 20% (not enough history)
144
+ # Fallback: legacy memory baseline (not enough history for statistical check)
145
+ local bundle_growth_limit
146
+ bundle_growth_limit=$(_config_get_int "quality.bundle_growth_legacy_pct" 20 2>/dev/null || echo 20)
145
147
  local baseline_size=""
146
148
  if [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
147
149
  baseline_size=$(bash "$SCRIPT_DIR/sw-memory.sh" get "bundle_size_kb" 2>/dev/null) || true
@@ -150,7 +152,7 @@ quality_check_bundle_size() {
150
152
  local growth_pct
151
153
  growth_pct=$(awk -v cur="$bundle_size" -v base="$baseline_size" 'BEGIN{printf "%d", ((cur - base) / base) * 100}')
152
154
  echo "Baseline: ${baseline_size}KB | Growth: ${growth_pct}%" >> "$metrics_log"
153
- if [[ "$growth_pct" -gt 20 ]]; then
155
+ if [[ "$growth_pct" -gt "$bundle_growth_limit" ]]; then
154
156
  warn "Bundle size grew ${growth_pct}% (${baseline_size}KB → ${bundle_size}KB)"
155
157
  return 1
156
158
  fi
@@ -299,7 +301,9 @@ $tail_output" < /dev/null 2>/dev/null | grep -oE '^[0-9.]+$' | head -1 || true)
299
301
  return 1
300
302
  fi
301
303
  else
302
- # Fallback: legacy memory baseline with hardcoded 30% (not enough history)
304
+ # Fallback: legacy memory baseline (not enough history for statistical check)
305
+ local perf_regression_limit
306
+ perf_regression_limit=$(_config_get_int "quality.perf_regression_legacy_pct" 30 2>/dev/null || echo 30)
303
307
  local baseline_dur=""
304
308
  if [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
305
309
  baseline_dur=$(bash "$SCRIPT_DIR/sw-memory.sh" get "test_duration_s" 2>/dev/null) || true
@@ -308,7 +312,7 @@ $tail_output" < /dev/null 2>/dev/null | grep -oE '^[0-9.]+$' | head -1 || true)
308
312
  local slowdown_pct
309
313
  slowdown_pct=$(awk -v cur="$duration_ms" -v base="$baseline_dur" 'BEGIN{printf "%d", ((cur - base) / base) * 100}')
310
314
  echo "Baseline: ${baseline_dur}s | Slowdown: ${slowdown_pct}%" >> "$metrics_log"
311
- if [[ "$slowdown_pct" -gt 30 ]]; then
315
+ if [[ "$slowdown_pct" -gt "$perf_regression_limit" ]]; then
312
316
  warn "Tests ${slowdown_pct}% slower (${baseline_dur}s → ${duration_ms}s)"
313
317
  return 1
314
318
  fi