shipwright-cli 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/config/defaults.json +25 -2
- package/config/policy.json +1 -1
- package/dashboard/public/index.html +6 -0
- package/dashboard/public/styles.css +76 -0
- package/dashboard/server.ts +51 -0
- package/dashboard/src/core/api.ts +5 -0
- package/dashboard/src/types/api.ts +10 -0
- package/dashboard/src/views/metrics.ts +69 -1
- package/package.json +1 -1
- package/scripts/lib/daemon-adaptive.sh +4 -2
- package/scripts/lib/daemon-patrol.sh +2 -2
- package/scripts/lib/daemon-state.sh +7 -0
- package/scripts/lib/helpers.sh +3 -1
- package/scripts/lib/pipeline-detection.sh +1 -1
- package/scripts/lib/pipeline-intelligence.sh +5 -3
- package/scripts/lib/pipeline-quality-checks.sh +8 -4
- package/scripts/lib/pipeline-stages.sh +132 -2
- package/scripts/sw +1 -1
- package/scripts/sw-activity.sh +1 -7
- package/scripts/sw-adaptive.sh +7 -7
- package/scripts/sw-adversarial.sh +1 -1
- package/scripts/sw-architecture-enforcer.sh +1 -1
- package/scripts/sw-auth.sh +1 -1
- package/scripts/sw-autonomous.sh +1 -1
- package/scripts/sw-changelog.sh +1 -1
- package/scripts/sw-checkpoint.sh +1 -1
- package/scripts/sw-ci.sh +11 -6
- package/scripts/sw-cleanup.sh +1 -1
- package/scripts/sw-code-review.sh +36 -17
- package/scripts/sw-connect.sh +1 -1
- package/scripts/sw-context.sh +1 -1
- package/scripts/sw-cost.sh +60 -3
- package/scripts/sw-daemon.sh +5 -2
- package/scripts/sw-dashboard.sh +1 -1
- package/scripts/sw-db.sh +13 -5
- package/scripts/sw-decide.sh +1 -1
- package/scripts/sw-decompose.sh +1 -1
- package/scripts/sw-deps.sh +1 -1
- package/scripts/sw-developer-simulation.sh +1 -1
- package/scripts/sw-discovery.sh +54 -4
- package/scripts/sw-doc-fleet.sh +1 -1
- package/scripts/sw-docs-agent.sh +1 -1
- package/scripts/sw-docs.sh +1 -1
- package/scripts/sw-doctor.sh +1 -1
- package/scripts/sw-dora.sh +1 -1
- package/scripts/sw-durable.sh +9 -5
- package/scripts/sw-e2e-orchestrator.sh +1 -1
- package/scripts/sw-eventbus.sh +7 -4
- package/scripts/sw-evidence.sh +1 -1
- package/scripts/sw-feedback.sh +1 -1
- package/scripts/sw-fix.sh +1 -1
- package/scripts/sw-fleet-discover.sh +1 -1
- package/scripts/sw-fleet-viz.sh +6 -4
- package/scripts/sw-fleet.sh +1 -1
- package/scripts/sw-github-app.sh +3 -2
- package/scripts/sw-github-checks.sh +1 -1
- package/scripts/sw-github-deploy.sh +1 -1
- package/scripts/sw-github-graphql.sh +1 -1
- package/scripts/sw-guild.sh +1 -1
- package/scripts/sw-heartbeat.sh +1 -1
- package/scripts/sw-hygiene.sh +5 -3
- package/scripts/sw-incident.sh +9 -5
- package/scripts/sw-init.sh +1 -1
- package/scripts/sw-instrument.sh +1 -1
- package/scripts/sw-intelligence.sh +3 -2
- package/scripts/sw-jira.sh +1 -1
- package/scripts/sw-launchd.sh +1 -1
- package/scripts/sw-linear.sh +1 -1
- package/scripts/sw-logs.sh +1 -1
- package/scripts/sw-loop.sh +72 -16
- package/scripts/sw-memory.sh +2 -2
- package/scripts/sw-mission-control.sh +1 -1
- package/scripts/sw-model-router.sh +3 -2
- package/scripts/sw-otel.sh +4 -2
- package/scripts/sw-oversight.sh +1 -1
- package/scripts/sw-pipeline-composer.sh +3 -1
- package/scripts/sw-pipeline-vitals.sh +11 -6
- package/scripts/sw-pipeline.sh +20 -8
- package/scripts/sw-pm.sh +5 -4
- package/scripts/sw-pr-lifecycle.sh +1 -1
- package/scripts/sw-predictive.sh +11 -5
- package/scripts/sw-prep.sh +1 -1
- package/scripts/sw-ps.sh +1 -1
- package/scripts/sw-public-dashboard.sh +3 -2
- package/scripts/sw-quality.sh +13 -6
- package/scripts/sw-reaper.sh +1 -1
- package/scripts/sw-recruit.sh +1 -1
- package/scripts/sw-regression.sh +1 -1
- package/scripts/sw-release-manager.sh +1 -1
- package/scripts/sw-release.sh +1 -1
- package/scripts/sw-remote.sh +1 -1
- package/scripts/sw-replay.sh +1 -1
- package/scripts/sw-retro.sh +1 -1
- package/scripts/sw-review-rerun.sh +1 -1
- package/scripts/sw-scale.sh +5 -3
- package/scripts/sw-security-audit.sh +1 -1
- package/scripts/sw-self-optimize.sh +168 -4
- package/scripts/sw-session.sh +1 -1
- package/scripts/sw-setup.sh +1 -1
- package/scripts/sw-standup.sh +1 -1
- package/scripts/sw-status.sh +1 -1
- package/scripts/sw-strategic.sh +11 -6
- package/scripts/sw-stream.sh +7 -4
- package/scripts/sw-swarm.sh +3 -2
- package/scripts/sw-team-stages.sh +1 -1
- package/scripts/sw-templates.sh +3 -3
- package/scripts/sw-testgen.sh +11 -6
- package/scripts/sw-tmux-pipeline.sh +1 -1
- package/scripts/sw-tmux.sh +35 -1
- package/scripts/sw-trace.sh +1 -1
- package/scripts/sw-tracker.sh +1 -1
- package/scripts/sw-triage.sh +2 -2
- package/scripts/sw-upgrade.sh +1 -1
- package/scripts/sw-ux.sh +1 -1
- package/scripts/sw-webhook.sh +3 -2
- package/scripts/sw-widgets.sh +7 -4
- package/scripts/sw-worktree.sh +1 -1
package/README.md
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
<a href="https://github.com/sethdford/shipwright/actions/workflows/test.yml"><img src="https://github.com/sethdford/shipwright/actions/workflows/test.yml/badge.svg" alt="Tests"></a>
|
|
14
14
|
<a href="https://github.com/sethdford/shipwright/actions/workflows/shipwright-pipeline.yml"><img src="https://github.com/sethdford/shipwright/actions/workflows/shipwright-pipeline.yml/badge.svg" alt="Pipeline"></a>
|
|
15
15
|
<img src="https://img.shields.io/badge/tests-141_suites_passing-4ade80?style=flat-square" alt="141 suites">
|
|
16
|
-
<img src="https://img.shields.io/badge/version-3.
|
|
16
|
+
<img src="https://img.shields.io/badge/version-3.2.0-00d4ff?style=flat-square" alt="v3.2.0">
|
|
17
17
|
<img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" alt="MIT License">
|
|
18
18
|
<img src="https://img.shields.io/badge/bash-3.2%2B-7c3aed?style=flat-square" alt="Bash 3.2+">
|
|
19
19
|
</p>
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
- [Shipwright Builds Itself](#shipwright-builds-itself)
|
|
26
26
|
- [Code Factory Pattern](#code-factory-pattern)
|
|
27
|
-
- [What's New in v3.
|
|
27
|
+
- [What's New in v3.2.0](#whats-new-in-v320)
|
|
28
28
|
- [How It Works](#how-it-works)
|
|
29
29
|
- [Install](#install)
|
|
30
30
|
- [Quick Start](#quick-start)
|
|
@@ -77,7 +77,8 @@ Shipwright extends the Code Factory pattern with capabilities most implementatio
|
|
|
77
77
|
- **12-stage pipeline** with self-healing builds, adversarial review, and compound quality gates
|
|
78
78
|
- **Predictive risk scoring** using GitHub signals (security alerts, contributor expertise, file churn)
|
|
79
79
|
- **Persistent memory** — failure patterns, fix effectiveness, and prediction accuracy compound over time
|
|
80
|
-
- **Auto-learning** — self-optimize runs automatically after every pipeline completion
|
|
80
|
+
- **Auto-learning** — self-optimize runs automatically after every pipeline completion, including context efficiency tuning
|
|
81
|
+
- **Decision engine** — tiered autonomous decisions with outcome learning and deduplication
|
|
81
82
|
- **Unified model routing** — single source of truth for model selection across all components
|
|
82
83
|
- **Evidence-gated merges** — SHA discipline ensures all evidence validated against current PR head
|
|
83
84
|
- **Semantic quality audits** — Claude-powered audits with grep fallback when Claude unavailable
|
|
@@ -108,7 +109,7 @@ shipwright incident gap sla
|
|
|
108
109
|
|
|
109
110
|
---
|
|
110
111
|
|
|
111
|
-
## What's New in v3.
|
|
112
|
+
## What's New in v3.2.0
|
|
112
113
|
|
|
113
114
|
**Code Factory pattern** — deterministic, risk-aware agent delivery with machine-verifiable evidence:
|
|
114
115
|
|
|
@@ -282,7 +283,7 @@ Each stage is configurable with quality gates that auto-proceed or pause for app
|
|
|
282
283
|
|
|
283
284
|
### Intelligence Layer
|
|
284
285
|
|
|
285
|
-
7 modules that make the pipeline smarter over time. **
|
|
286
|
+
7 modules that make the pipeline smarter over time. **Enabled by default**: intelligence is on when Claude CLI is available, with optimization and prediction active out of the box. Set `intelligence.enabled=false` to disable. All modules degrade gracefully.
|
|
286
287
|
|
|
287
288
|
| Module | What It Does |
|
|
288
289
|
| ---------------------------- | --------------------------------------------------------------------------------------------------------------------- |
|
|
@@ -290,7 +291,7 @@ Each stage is configurable with quality gates that auto-proceed or pause for app
|
|
|
290
291
|
| **Pipeline Composer** | Generates custom pipeline configs from codebase analysis (file churn, test coverage, dependencies) |
|
|
291
292
|
| **Predictive Risk** | Scores issues for risk using GitHub signals (security alerts, similar past issues, contributor expertise) |
|
|
292
293
|
| **Adversarial Review** | Red-team code review — finds security flaws, edge cases, failure modes. Cross-checks against CodeQL/Dependabot alerts |
|
|
293
|
-
| **Self-Optimization** | Reads DORA metrics and auto-tunes daemon config.
|
|
294
|
+
| **Self-Optimization** | Reads DORA metrics and auto-tunes daemon config. Includes context efficiency closed loop for token budget tuning |
|
|
294
295
|
| **Developer Simulation** | 3-persona review (security, performance, maintainability) before PR creation |
|
|
295
296
|
| **Architecture Enforcement** | Living architectural model with violation detection and dependency direction rules |
|
|
296
297
|
|
|
@@ -309,6 +310,19 @@ Native GitHub API integration enriches every intelligence module:
|
|
|
309
310
|
| **Contributors** | CODEOWNERS-based reviewer routing, top-contributor fallback, auto-approve as last resort |
|
|
310
311
|
| **Branch Protection** | Checks required reviews and status checks before attempting auto-merge |
|
|
311
312
|
|
|
313
|
+
### Decision Engine
|
|
314
|
+
|
|
315
|
+
The autonomous decision engine (`config/policy.json` → `decision` section) handles routine operational decisions with outcome learning. Decisions are tiered by risk, with low-risk actions auto-approved and higher tiers escalated. The engine learns from outcomes to improve future decisions.
|
|
316
|
+
|
|
317
|
+
### Context Engineering
|
|
318
|
+
|
|
319
|
+
Intelligent context window management for pipeline agents:
|
|
320
|
+
|
|
321
|
+
- **Budget-aware trimming** — Configurable character budgets for prompt composition (`context_budget_chars`)
|
|
322
|
+
- **Section-level trimming** — Independent limits for memory, git history, hotspot files, and test output
|
|
323
|
+
- **Context efficiency metrics** — Tracks budget utilization and trim ratios per iteration
|
|
324
|
+
- **Self-tuning** — The self-optimization loop analyzes context efficiency events and recommends budget adjustments
|
|
325
|
+
|
|
312
326
|
### Autonomous Daemon
|
|
313
327
|
|
|
314
328
|
```bash
|
|
@@ -354,7 +368,7 @@ Per-pipeline cost tracking with model pricing, budget enforcement, and ROI analy
|
|
|
354
368
|
shipwright dashboard start
|
|
355
369
|
```
|
|
356
370
|
|
|
357
|
-
Web dashboard with live pipeline progress, GitHub context (security alerts, contributors, deployments), DORA metrics, and
|
|
371
|
+
Web dashboard with live pipeline progress, GitHub context (security alerts, contributors, deployments), DORA metrics, cost tracking, and context efficiency metrics. WebSocket-powered, updates in real-time.
|
|
358
372
|
|
|
359
373
|
### Webhook Receiver
|
|
360
374
|
|
package/config/defaults.json
CHANGED
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"build_test_retries": 3,
|
|
26
26
|
"claude_timeout": 1800,
|
|
27
27
|
"heartbeat_interval": 30,
|
|
28
|
+
"composed_cache_ttl": 3600,
|
|
28
29
|
"branch_pattern": "shipwright/issue-{issue}",
|
|
29
30
|
"stage_order": [
|
|
30
31
|
"intake",
|
|
@@ -47,7 +48,12 @@
|
|
|
47
48
|
"max_restarts": 0,
|
|
48
49
|
"fast_test_interval": 5,
|
|
49
50
|
"convergence_threshold": 3,
|
|
50
|
-
"multi_agent_sleep": 5
|
|
51
|
+
"multi_agent_sleep": 5,
|
|
52
|
+
"context_budget_chars": 180000,
|
|
53
|
+
"context_trim_memory_chars": 20000,
|
|
54
|
+
"context_trim_git_entries": 10,
|
|
55
|
+
"context_trim_hotspot_files": 5,
|
|
56
|
+
"context_trim_test_lines": 50
|
|
51
57
|
},
|
|
52
58
|
"dashboard": {
|
|
53
59
|
"port": 8767,
|
|
@@ -77,12 +83,29 @@
|
|
|
77
83
|
"ab_test_ratio": 0.2,
|
|
78
84
|
"claude_timeout": 60
|
|
79
85
|
},
|
|
86
|
+
"predictive": {
|
|
87
|
+
"default_risk_score": 50,
|
|
88
|
+
"keyword_risk_score": 70
|
|
89
|
+
},
|
|
90
|
+
"api_optimization": {
|
|
91
|
+
"programmatic_tool_calling": true,
|
|
92
|
+
"tool_search_enabled": true,
|
|
93
|
+
"tool_search_type": "bm25",
|
|
94
|
+
"defer_unused_tools": true,
|
|
95
|
+
"web_search_version": "web_search_20260209",
|
|
96
|
+
"web_fetch_version": "web_fetch_20260209",
|
|
97
|
+
"dynamic_filtering": true,
|
|
98
|
+
"code_execution_sandbox": true,
|
|
99
|
+
"beta_header": "code-execution-web-tools-2026-02-09"
|
|
100
|
+
},
|
|
80
101
|
"quality": {
|
|
81
102
|
"gate_score_threshold": 70,
|
|
82
103
|
"secret_threshold": 3,
|
|
83
104
|
"min_file_count": 10,
|
|
84
105
|
"score_weight_per_file": 25,
|
|
85
|
-
"pass_rate_threshold": 5.0
|
|
106
|
+
"pass_rate_threshold": 5.0,
|
|
107
|
+
"bundle_growth_legacy_pct": 20,
|
|
108
|
+
"perf_regression_legacy_pct": 30
|
|
86
109
|
},
|
|
87
110
|
"cleanup": {
|
|
88
111
|
"artifact_age_days": 7,
|
package/config/policy.json
CHANGED
|
@@ -516,6 +516,12 @@
|
|
|
516
516
|
id="cost-trend-container"
|
|
517
517
|
></div>
|
|
518
518
|
|
|
519
|
+
<!-- Context efficiency -->
|
|
520
|
+
<div
|
|
521
|
+
class="metric-card metric-card-wide"
|
|
522
|
+
id="context-efficiency-container"
|
|
523
|
+
></div>
|
|
524
|
+
|
|
519
525
|
<!-- DORA trend -->
|
|
520
526
|
<div
|
|
521
527
|
class="metric-card metric-card-wide"
|
|
@@ -3634,6 +3634,82 @@ body::-webkit-scrollbar-thumb {
|
|
|
3634
3634
|
text-align: center;
|
|
3635
3635
|
}
|
|
3636
3636
|
|
|
3637
|
+
/* Context efficiency widget */
|
|
3638
|
+
.ctx-eff-grid {
|
|
3639
|
+
display: grid;
|
|
3640
|
+
grid-template-columns: repeat(4, 1fr);
|
|
3641
|
+
gap: 16px;
|
|
3642
|
+
}
|
|
3643
|
+
|
|
3644
|
+
.ctx-eff-card {
|
|
3645
|
+
background: var(--glass-bg);
|
|
3646
|
+
border: 1px solid var(--glass-border);
|
|
3647
|
+
border-radius: 10px;
|
|
3648
|
+
padding: 12px;
|
|
3649
|
+
display: flex;
|
|
3650
|
+
flex-direction: column;
|
|
3651
|
+
gap: 6px;
|
|
3652
|
+
}
|
|
3653
|
+
|
|
3654
|
+
.ctx-eff-card-label {
|
|
3655
|
+
font-family: var(--font-mono);
|
|
3656
|
+
font-size: 0.65rem;
|
|
3657
|
+
font-weight: 600;
|
|
3658
|
+
letter-spacing: 0.08em;
|
|
3659
|
+
color: var(--text-muted);
|
|
3660
|
+
text-transform: uppercase;
|
|
3661
|
+
}
|
|
3662
|
+
|
|
3663
|
+
.ctx-eff-gauge {
|
|
3664
|
+
height: 8px;
|
|
3665
|
+
border-radius: 4px;
|
|
3666
|
+
background: var(--glass-border);
|
|
3667
|
+
overflow: hidden;
|
|
3668
|
+
}
|
|
3669
|
+
|
|
3670
|
+
.ctx-eff-gauge-fill {
|
|
3671
|
+
height: 100%;
|
|
3672
|
+
border-radius: 4px;
|
|
3673
|
+
transition: width 0.5s ease;
|
|
3674
|
+
}
|
|
3675
|
+
|
|
3676
|
+
.ctx-eff-gauge-fill.ctx-eff-high {
|
|
3677
|
+
background: #4ade80;
|
|
3678
|
+
}
|
|
3679
|
+
|
|
3680
|
+
.ctx-eff-gauge-fill.ctx-eff-mid {
|
|
3681
|
+
background: #00d4ff;
|
|
3682
|
+
}
|
|
3683
|
+
|
|
3684
|
+
.ctx-eff-gauge-fill.ctx-eff-low {
|
|
3685
|
+
background: #f43f5e;
|
|
3686
|
+
}
|
|
3687
|
+
|
|
3688
|
+
.ctx-eff-gauge-fill.ctx-eff-trim {
|
|
3689
|
+
background: #7c3aed;
|
|
3690
|
+
}
|
|
3691
|
+
|
|
3692
|
+
.ctx-eff-value {
|
|
3693
|
+
font-family: var(--font-mono);
|
|
3694
|
+
font-size: 0.85rem;
|
|
3695
|
+
font-weight: 700;
|
|
3696
|
+
color: var(--text-primary);
|
|
3697
|
+
}
|
|
3698
|
+
|
|
3699
|
+
.ctx-eff-big {
|
|
3700
|
+
font-family: var(--font-mono);
|
|
3701
|
+
font-size: 1.4rem;
|
|
3702
|
+
font-weight: 700;
|
|
3703
|
+
color: var(--text-primary);
|
|
3704
|
+
line-height: 1;
|
|
3705
|
+
}
|
|
3706
|
+
|
|
3707
|
+
.ctx-eff-sub {
|
|
3708
|
+
font-family: var(--font-mono);
|
|
3709
|
+
font-size: 0.65rem;
|
|
3710
|
+
color: var(--text-muted);
|
|
3711
|
+
}
|
|
3712
|
+
|
|
3637
3713
|
.dora-trend-grid {
|
|
3638
3714
|
display: grid;
|
|
3639
3715
|
grid-template-columns: repeat(4, 1fr);
|
package/dashboard/server.ts
CHANGED
|
@@ -3690,6 +3690,57 @@ const server = Bun.serve({
|
|
|
3690
3690
|
});
|
|
3691
3691
|
}
|
|
3692
3692
|
|
|
3693
|
+
// REST: Context efficiency metrics (from loop.context_efficiency events)
|
|
3694
|
+
if (pathname === "/api/context-efficiency") {
|
|
3695
|
+
const period = parseInt(url.searchParams.get("period") || "7");
|
|
3696
|
+
const events = readEvents();
|
|
3697
|
+
const now = Math.floor(Date.now() / 1000);
|
|
3698
|
+
const cutoff = now - period * 86400;
|
|
3699
|
+
|
|
3700
|
+
let totalUtil = 0;
|
|
3701
|
+
let totalRatio = 0;
|
|
3702
|
+
let totalRaw = 0;
|
|
3703
|
+
let totalTrimmed = 0;
|
|
3704
|
+
let trimEvents = 0;
|
|
3705
|
+
let count = 0;
|
|
3706
|
+
|
|
3707
|
+
for (const e of events) {
|
|
3708
|
+
if ((e.ts_epoch || 0) < cutoff) continue;
|
|
3709
|
+
if (e.type !== "loop.context_efficiency") continue;
|
|
3710
|
+
|
|
3711
|
+
const util = parseFloat(String(e.budget_utilization || 0));
|
|
3712
|
+
const ratio = parseFloat(String(e.trim_ratio || 0));
|
|
3713
|
+
const raw = parseInt(String(e.raw_prompt_chars || 0), 10);
|
|
3714
|
+
const trimmed = parseInt(String(e.trimmed_prompt_chars || 0), 10);
|
|
3715
|
+
|
|
3716
|
+
totalUtil += util;
|
|
3717
|
+
totalRatio += ratio;
|
|
3718
|
+
totalRaw += raw;
|
|
3719
|
+
totalTrimmed += trimmed;
|
|
3720
|
+
if (ratio > 0) trimEvents++;
|
|
3721
|
+
count++;
|
|
3722
|
+
}
|
|
3723
|
+
|
|
3724
|
+
const avgUtilization =
|
|
3725
|
+
count > 0 ? Math.round((totalUtil / count) * 10) / 10 : 0;
|
|
3726
|
+
const avgTrimRatio =
|
|
3727
|
+
count > 0 ? Math.round((totalRatio / count) * 10) / 10 : 0;
|
|
3728
|
+
const totalDiscarded = totalRaw - totalTrimmed;
|
|
3729
|
+
|
|
3730
|
+
return new Response(
|
|
3731
|
+
JSON.stringify({
|
|
3732
|
+
avg_utilization: avgUtilization,
|
|
3733
|
+
avg_trim_ratio: avgTrimRatio,
|
|
3734
|
+
total_raw_chars: totalRaw,
|
|
3735
|
+
total_trimmed_chars: totalTrimmed,
|
|
3736
|
+
total_discarded_chars: totalDiscarded,
|
|
3737
|
+
trim_events: trimEvents,
|
|
3738
|
+
total_iterations: count,
|
|
3739
|
+
}),
|
|
3740
|
+
{ headers: { "Content-Type": "application/json", ...CORS_HEADERS } },
|
|
3741
|
+
);
|
|
3742
|
+
}
|
|
3743
|
+
|
|
3693
3744
|
// REST: DORA trend (weekly sliding windows)
|
|
3694
3745
|
if (pathname === "/api/metrics/dora-trend") {
|
|
3695
3746
|
const period = parseInt(url.searchParams.get("period") || "30");
|
|
@@ -7,6 +7,7 @@ import type {
|
|
|
7
7
|
MachineInfo,
|
|
8
8
|
JoinToken,
|
|
9
9
|
CostBreakdown,
|
|
10
|
+
ContextEfficiency,
|
|
10
11
|
DaemonConfig,
|
|
11
12
|
AlertInfo,
|
|
12
13
|
InsightsData,
|
|
@@ -113,6 +114,10 @@ export const fetchCostTrend = (period = 30) =>
|
|
|
113
114
|
`/api/costs/trend?period=${period}`,
|
|
114
115
|
);
|
|
115
116
|
|
|
117
|
+
// Context efficiency
|
|
118
|
+
export const fetchContextEfficiency = (period = 7) =>
|
|
119
|
+
request<ContextEfficiency>(`/api/context-efficiency?period=${period}`);
|
|
120
|
+
|
|
116
121
|
// Daemon
|
|
117
122
|
export const fetchDaemonConfig = () =>
|
|
118
123
|
request<DaemonConfig>("/api/daemon/config");
|
|
@@ -255,6 +255,16 @@ export interface HeatmapData {
|
|
|
255
255
|
heatmap: Record<string, Record<string, number>>;
|
|
256
256
|
}
|
|
257
257
|
|
|
258
|
+
export interface ContextEfficiency {
|
|
259
|
+
avg_utilization: number;
|
|
260
|
+
avg_trim_ratio: number;
|
|
261
|
+
total_raw_chars: number;
|
|
262
|
+
total_trimmed_chars: number;
|
|
263
|
+
total_discarded_chars: number;
|
|
264
|
+
trim_events: number;
|
|
265
|
+
total_iterations: number;
|
|
266
|
+
}
|
|
267
|
+
|
|
258
268
|
export interface DaemonConfig {
|
|
259
269
|
paused?: boolean;
|
|
260
270
|
config?: Record<string, unknown>;
|
|
@@ -82,10 +82,12 @@ function renderMetrics(data: MetricsData): void {
|
|
|
82
82
|
doraContainer.style.display = "none";
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
// Cost breakdown/trend
|
|
85
|
+
// Cost breakdown/trend/context efficiency
|
|
86
86
|
if (document.getElementById("cost-breakdown-container"))
|
|
87
87
|
renderCostBreakdown();
|
|
88
88
|
if (document.getElementById("cost-trend-container")) renderCostTrend();
|
|
89
|
+
if (document.getElementById("context-efficiency-container"))
|
|
90
|
+
renderContextEfficiency();
|
|
89
91
|
if (document.getElementById("dora-trend-container")) renderDoraTrend();
|
|
90
92
|
if (document.getElementById("stage-performance-container"))
|
|
91
93
|
renderStagePerformance();
|
|
@@ -256,6 +258,72 @@ function renderCostTrend(): void {
|
|
|
256
258
|
});
|
|
257
259
|
}
|
|
258
260
|
|
|
261
|
+
function renderContextEfficiency(): void {
|
|
262
|
+
const container = document.getElementById("context-efficiency-container");
|
|
263
|
+
if (!container) return;
|
|
264
|
+
api
|
|
265
|
+
.fetchContextEfficiency()
|
|
266
|
+
.then((data) => {
|
|
267
|
+
if (!data.total_iterations) {
|
|
268
|
+
container.innerHTML =
|
|
269
|
+
'<div class="empty-state"><p>No context efficiency data</p></div>';
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
const utilPct = Math.min(data.avg_utilization, 100);
|
|
273
|
+
const utilClass =
|
|
274
|
+
utilPct >= 90
|
|
275
|
+
? "ctx-eff-high"
|
|
276
|
+
: utilPct >= 60
|
|
277
|
+
? "ctx-eff-mid"
|
|
278
|
+
: "ctx-eff-low";
|
|
279
|
+
const trimPct = Math.min(data.avg_trim_ratio, 100);
|
|
280
|
+
|
|
281
|
+
let html =
|
|
282
|
+
'<span class="metric-label">CONTEXT EFFICIENCY</span>' +
|
|
283
|
+
'<div class="ctx-eff-grid">';
|
|
284
|
+
|
|
285
|
+
// Budget utilization gauge
|
|
286
|
+
html +=
|
|
287
|
+
'<div class="ctx-eff-card">' +
|
|
288
|
+
'<span class="ctx-eff-card-label">Budget Utilization</span>' +
|
|
289
|
+
`<div class="ctx-eff-gauge"><div class="ctx-eff-gauge-fill ${utilClass}" style="width:${utilPct.toFixed(0)}%"></div></div>` +
|
|
290
|
+
`<span class="ctx-eff-value">${data.avg_utilization.toFixed(1)}%</span>` +
|
|
291
|
+
"</div>";
|
|
292
|
+
|
|
293
|
+
// Trim ratio
|
|
294
|
+
html +=
|
|
295
|
+
'<div class="ctx-eff-card">' +
|
|
296
|
+
'<span class="ctx-eff-card-label">Avg Trim Ratio</span>' +
|
|
297
|
+
`<div class="ctx-eff-gauge"><div class="ctx-eff-gauge-fill ctx-eff-trim" style="width:${trimPct.toFixed(0)}%"></div></div>` +
|
|
298
|
+
`<span class="ctx-eff-value">${data.avg_trim_ratio.toFixed(1)}%</span>` +
|
|
299
|
+
"</div>";
|
|
300
|
+
|
|
301
|
+
// Chars saved
|
|
302
|
+
const savedK = Math.round(data.total_discarded_chars / 1000);
|
|
303
|
+
const totalK = Math.round(data.total_raw_chars / 1000);
|
|
304
|
+
html +=
|
|
305
|
+
'<div class="ctx-eff-card">' +
|
|
306
|
+
'<span class="ctx-eff-card-label">Chars Discarded</span>' +
|
|
307
|
+
`<span class="ctx-eff-big">${fmtNum(savedK)}K</span>` +
|
|
308
|
+
`<span class="ctx-eff-sub">of ${fmtNum(totalK)}K generated</span>` +
|
|
309
|
+
"</div>";
|
|
310
|
+
|
|
311
|
+
// Trim events
|
|
312
|
+
html +=
|
|
313
|
+
'<div class="ctx-eff-card">' +
|
|
314
|
+
'<span class="ctx-eff-card-label">Trim Events</span>' +
|
|
315
|
+
`<span class="ctx-eff-big">${data.trim_events}</span>` +
|
|
316
|
+
`<span class="ctx-eff-sub">of ${data.total_iterations} iterations</span>` +
|
|
317
|
+
"</div>";
|
|
318
|
+
|
|
319
|
+
html += "</div>";
|
|
320
|
+
container.innerHTML = html;
|
|
321
|
+
})
|
|
322
|
+
.catch((err) => {
|
|
323
|
+
container.innerHTML = `<div class="empty-state"><p>Failed to load: ${escapeHtml(String(err))}</p></div>`;
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
259
327
|
function renderDoraTrend(): void {
|
|
260
328
|
const container = document.getElementById("dora-trend-container");
|
|
261
329
|
if (!container) return;
|
package/package.json
CHANGED
|
@@ -223,10 +223,12 @@ daemon_collect_snapshot() {
|
|
|
223
223
|
if [[ -d "$worktree/.git" ]] || [[ -f "$worktree/.git" ]]; then
|
|
224
224
|
diff_lines=$(cd "$worktree" && git diff --stat 2>/dev/null | tail -1 | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
|
|
225
225
|
[[ -z "$diff_lines" ]] && diff_lines=0
|
|
226
|
-
files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' ||
|
|
226
|
+
files_changed=$(cd "$worktree" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ' || true)
|
|
227
|
+
files_changed="${files_changed:-0}"
|
|
227
228
|
# Also count untracked files the agent has created
|
|
228
229
|
local untracked
|
|
229
|
-
untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' ||
|
|
230
|
+
untracked=$(cd "$worktree" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ' || true)
|
|
231
|
+
untracked="${untracked:-0}"
|
|
230
232
|
files_changed=$((files_changed + untracked))
|
|
231
233
|
fi
|
|
232
234
|
|
|
@@ -853,8 +853,8 @@ Auto-detected by \`shipwright daemon patrol\` on $(now_iso)." \
|
|
|
853
853
|
usage_count=${usage_count:-0}
|
|
854
854
|
|
|
855
855
|
local line_count
|
|
856
|
-
line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ' ||
|
|
857
|
-
line_count
|
|
856
|
+
line_count=$(wc -l < "$script" 2>/dev/null | tr -d ' ' || true)
|
|
857
|
+
line_count="${line_count:-0}"
|
|
858
858
|
|
|
859
859
|
untested_entries="${untested_entries}${usage_count}|${basename}|${line_count}\n"
|
|
860
860
|
findings=$((findings + 1))
|
|
@@ -458,6 +458,13 @@ get_active_count() {
|
|
|
458
458
|
echo 0
|
|
459
459
|
return
|
|
460
460
|
fi
|
|
461
|
+
# Validate state file JSON before parsing (mid-flight corruption check)
|
|
462
|
+
if ! jq empty "$STATE_FILE" 2>/dev/null; then
|
|
463
|
+
daemon_log WARN "State file corrupted mid-flight — backing up and resetting"
|
|
464
|
+
cp "$STATE_FILE" "${STATE_FILE}.corrupted.$(date +%s)" 2>/dev/null || true
|
|
465
|
+
init_state
|
|
466
|
+
return
|
|
467
|
+
fi
|
|
461
468
|
jq -r '.active_jobs | length' "$STATE_FILE" 2>/dev/null || echo 0
|
|
462
469
|
}
|
|
463
470
|
|
package/scripts/lib/helpers.sh
CHANGED
|
@@ -89,7 +89,9 @@ emit_event() {
|
|
|
89
89
|
local _lock_file="${EVENTS_FILE}.lock"
|
|
90
90
|
(
|
|
91
91
|
if command -v flock >/dev/null 2>&1; then
|
|
92
|
-
flock -w 2 200 2>/dev/null
|
|
92
|
+
if ! flock -w 2 200 2>/dev/null; then
|
|
93
|
+
echo "WARN: emit_event lock timeout — concurrent write possible" >&2
|
|
94
|
+
fi
|
|
93
95
|
fi
|
|
94
96
|
echo "$_event_line" >> "$EVENTS_FILE"
|
|
95
97
|
) 200>"$_lock_file"
|
|
@@ -305,7 +305,7 @@ $content"
|
|
|
305
305
|
|
|
306
306
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
307
307
|
# 3. Adaptive Cycle Limits
|
|
308
|
-
# Replaces
|
|
308
|
+
# Replaces default max_cycles with convergence-driven limits.
|
|
309
309
|
# Takes the base limit, returns an adjusted limit based on:
|
|
310
310
|
# - Learned iteration model
|
|
311
311
|
# - Convergence/divergence signals
|
|
@@ -1148,13 +1148,15 @@ stage_compound_quality() {
|
|
|
1148
1148
|
_cq_real_changes=$(git diff --name-only "origin/${BASE_BRANCH:-main}...HEAD" \
|
|
1149
1149
|
-- . ':!.claude/loop-state.md' ':!.claude/pipeline-state.md' \
|
|
1150
1150
|
':!.claude/pipeline-artifacts/*' ':!**/progress.md' \
|
|
1151
|
-
':!**/error-summary.json' 2>/dev/null | wc -l ||
|
|
1151
|
+
':!**/error-summary.json' 2>/dev/null | wc -l || true)
|
|
1152
|
+
_cq_real_changes="${_cq_real_changes:-0}"
|
|
1152
1153
|
_cq_real_changes=$(echo "$_cq_real_changes" | tr -d '[:space:]')
|
|
1153
1154
|
[[ -z "$_cq_real_changes" ]] && _cq_real_changes=0
|
|
1154
1155
|
# Fallback: if no remote, compare against first commit
|
|
1155
1156
|
if [[ "$_cq_real_changes" -eq 0 ]] 2>/dev/null; then
|
|
1156
1157
|
_cq_real_changes=$(git diff --name-only "$(git rev-list --max-parents=0 HEAD 2>/dev/null)...HEAD" \
|
|
1157
|
-
-- . ':!.claude/*' ':!**/progress.md' ':!**/error-summary.json' 2>/dev/null | wc -l ||
|
|
1158
|
+
-- . ':!.claude/*' ':!**/progress.md' ':!**/error-summary.json' 2>/dev/null | wc -l || true)
|
|
1159
|
+
_cq_real_changes="${_cq_real_changes:-0}"
|
|
1158
1160
|
_cq_real_changes=$(echo "$_cq_real_changes" | tr -d '[:space:]')
|
|
1159
1161
|
[[ -z "$_cq_real_changes" ]] && _cq_real_changes=0
|
|
1160
1162
|
fi
|
|
@@ -141,7 +141,9 @@ quality_check_bundle_size() {
|
|
|
141
141
|
return 1
|
|
142
142
|
fi
|
|
143
143
|
else
|
|
144
|
-
# Fallback: legacy memory baseline
|
|
144
|
+
# Fallback: legacy memory baseline (not enough history for statistical check)
|
|
145
|
+
local bundle_growth_limit
|
|
146
|
+
bundle_growth_limit=$(_config_get_int "quality.bundle_growth_legacy_pct" 20 2>/dev/null || echo 20)
|
|
145
147
|
local baseline_size=""
|
|
146
148
|
if [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
|
|
147
149
|
baseline_size=$(bash "$SCRIPT_DIR/sw-memory.sh" get "bundle_size_kb" 2>/dev/null) || true
|
|
@@ -150,7 +152,7 @@ quality_check_bundle_size() {
|
|
|
150
152
|
local growth_pct
|
|
151
153
|
growth_pct=$(awk -v cur="$bundle_size" -v base="$baseline_size" 'BEGIN{printf "%d", ((cur - base) / base) * 100}')
|
|
152
154
|
echo "Baseline: ${baseline_size}KB | Growth: ${growth_pct}%" >> "$metrics_log"
|
|
153
|
-
if [[ "$growth_pct" -gt
|
|
155
|
+
if [[ "$growth_pct" -gt "$bundle_growth_limit" ]]; then
|
|
154
156
|
warn "Bundle size grew ${growth_pct}% (${baseline_size}KB → ${bundle_size}KB)"
|
|
155
157
|
return 1
|
|
156
158
|
fi
|
|
@@ -299,7 +301,9 @@ $tail_output" < /dev/null 2>/dev/null | grep -oE '^[0-9.]+$' | head -1 || true)
|
|
|
299
301
|
return 1
|
|
300
302
|
fi
|
|
301
303
|
else
|
|
302
|
-
# Fallback: legacy memory baseline
|
|
304
|
+
# Fallback: legacy memory baseline (not enough history for statistical check)
|
|
305
|
+
local perf_regression_limit
|
|
306
|
+
perf_regression_limit=$(_config_get_int "quality.perf_regression_legacy_pct" 30 2>/dev/null || echo 30)
|
|
303
307
|
local baseline_dur=""
|
|
304
308
|
if [[ -x "$SCRIPT_DIR/sw-memory.sh" ]]; then
|
|
305
309
|
baseline_dur=$(bash "$SCRIPT_DIR/sw-memory.sh" get "test_duration_s" 2>/dev/null) || true
|
|
@@ -308,7 +312,7 @@ $tail_output" < /dev/null 2>/dev/null | grep -oE '^[0-9.]+$' | head -1 || true)
|
|
|
308
312
|
local slowdown_pct
|
|
309
313
|
slowdown_pct=$(awk -v cur="$duration_ms" -v base="$baseline_dur" 'BEGIN{printf "%d", ((cur - base) / base) * 100}')
|
|
310
314
|
echo "Baseline: ${baseline_dur}s | Slowdown: ${slowdown_pct}%" >> "$metrics_log"
|
|
311
|
-
if [[ "$slowdown_pct" -gt
|
|
315
|
+
if [[ "$slowdown_pct" -gt "$perf_regression_limit" ]]; then
|
|
312
316
|
warn "Tests ${slowdown_pct}% slower (${baseline_dur}s → ${duration_ms}s)"
|
|
313
317
|
return 1
|
|
314
318
|
fi
|