@swarmclawai/swarmclaw 1.9.7 → 1.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/package.json +2 -2
- package/src/app/api/quality/release-readiness/route.ts +38 -0
- package/src/cli/index.js +1 -0
- package/src/components/quality/quality-workspace.tsx +164 -4
- package/src/lib/quality/release-readiness.test.ts +129 -0
- package/src/lib/quality/release-readiness.ts +187 -0
package/README.md
CHANGED
|
@@ -399,6 +399,15 @@ Operational docs: https://swarmclaw.ai/docs/observability
|
|
|
399
399
|
|
|
400
400
|
## Releases
|
|
401
401
|
|
|
402
|
+
### v1.9.8 Highlights
|
|
403
|
+
|
|
404
|
+
Bundled release-readiness release: a single operator report that combines eval gates, operations blockers, approvals, and runtime readiness.
|
|
405
|
+
|
|
406
|
+
- **Release readiness report.** `/api/quality/release-readiness` returns a scored ready/warning/blocked report built from eval regression gates and Operations Pulse evidence.
|
|
407
|
+
- **Quality Center ship gate.** The Quality overview now shows readiness score, blockers, warnings, checks, and next actions before operators cut a release.
|
|
408
|
+
- **CLI readiness checks.** `swarmclaw operations readiness` exposes the same report for scripts and CI.
|
|
409
|
+
- **Browser coverage.** The e2e smoke now verifies the release-readiness panel on `/quality`.
|
|
410
|
+
|
|
402
411
|
### v1.9.7 Highlights
|
|
403
412
|
|
|
404
413
|
Bundled eval-gate release: approved baselines, regression checks, and Quality Center release gates for repeatable eval evidence.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@swarmclawai/swarmclaw",
|
|
3
|
-
"version": "1.9.
|
|
3
|
+
"version": "1.9.8",
|
|
4
4
|
"description": "Build and run autonomous AI agents with OpenClaw, Hermes, multiple model providers, orchestration, delegation, memory, skills, schedules, and chat connectors.",
|
|
5
5
|
"main": "electron-dist/main.js",
|
|
6
6
|
"license": "MIT",
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"test:cli": "node --test src/cli/*.test.js bin/*.test.js scripts/electron-after-pack.test.mjs scripts/ensure-sandbox-browser-image.test.mjs scripts/postinstall.test.mjs scripts/run-next-build.test.mjs scripts/run-next-typegen.test.mjs",
|
|
88
88
|
"test:setup": "tsx --test src/app/api/setup/check-provider/route.test.ts src/lib/server/provider-model-discovery.test.ts src/components/auth/setup-wizard/utils.test.ts src/components/auth/setup-wizard/types.test.ts src/hooks/setup-done-detection.test.ts src/lib/setup-defaults.test.ts src/lib/server/storage-auth.test.ts src/lib/server/storage-auth-docker.test.ts",
|
|
89
89
|
"test:openclaw": "tsx --test src/lib/openclaw/openclaw-agent-id.test.ts src/lib/openclaw/openclaw-endpoint.test.ts src/lib/server/agents/agent-runtime-config.test.ts src/lib/server/build-llm.test.ts src/lib/server/connectors/connector-routing.test.ts src/lib/server/connectors/openclaw.test.ts src/lib/server/connectors/swarmdock.test.ts src/lib/server/gateway/protocol.test.ts src/lib/server/gateways/gateway-topology.test.ts src/lib/server/llm-response-cache.test.ts src/lib/server/mcp-conformance.test.ts src/lib/server/openclaw/agent-resolver.test.ts src/lib/server/openclaw/deploy.test.ts src/lib/server/openclaw/skills-normalize.test.ts src/lib/server/session-tools/openclaw-nodes.test.ts src/lib/server/session-tools/swarmdock.test.ts src/lib/server/tasks/task-quality-gate.test.ts src/lib/server/tasks/task-validation.test.ts src/lib/server/tool-capability-policy.test.ts src/lib/providers/openai.test.ts src/lib/providers/openclaw-exports.test.ts src/app/api/gateways/topology-route.test.ts src/app/api/openclaw/dashboard-url/route.test.ts",
|
|
90
|
-
"test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/baseline.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/tts/route.test.ts",
|
|
90
|
+
"test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/baseline.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/quality/release-readiness.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/tts/route.test.ts",
|
|
91
91
|
"test:builder": "tsx --test src/features/protocols/builder/utils/nodes-to-template.test.ts src/features/protocols/builder/utils/template-to-nodes.test.ts src/features/protocols/builder/validators/dag-validator.test.ts",
|
|
92
92
|
"test:e2e": "node --import tsx scripts/browser-e2e-smoke.ts",
|
|
93
93
|
"test:mcp:conformance": "node --import tsx ./scripts/mcp-conformance-check.ts",
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { NextResponse } from 'next/server'
|
|
2
|
+
import { evaluateEvalGate } from '@/lib/server/eval/baseline'
|
|
3
|
+
import { getOperationPulse, normalizeOperationPulseRange } from '@/lib/server/operations/operation-pulse'
|
|
4
|
+
import { buildReleaseReadinessReport } from '@/lib/quality/release-readiness'
|
|
5
|
+
import { errorMessage } from '@/lib/shared-utils'
|
|
6
|
+
|
|
7
|
+
export const dynamic = 'force-dynamic'
|
|
8
|
+
|
|
9
|
+
function parseNumberParam(value: string | null): number | null {
|
|
10
|
+
if (value == null || value.trim() === '') return null
|
|
11
|
+
const parsed = Number(value)
|
|
12
|
+
return Number.isFinite(parsed) ? parsed : null
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function GET(req: Request) {
|
|
16
|
+
try {
|
|
17
|
+
const { searchParams } = new URL(req.url)
|
|
18
|
+
const range = normalizeOperationPulseRange(searchParams.get('range'))
|
|
19
|
+
const agentId = searchParams.get('agentId') || ''
|
|
20
|
+
const pulse = getOperationPulse(range)
|
|
21
|
+
const evalGate = agentId
|
|
22
|
+
? evaluateEvalGate({
|
|
23
|
+
agentId,
|
|
24
|
+
scenarioId: searchParams.get('scenarioId'),
|
|
25
|
+
suite: searchParams.get('suite'),
|
|
26
|
+
minPercent: parseNumberParam(searchParams.get('minPercent')),
|
|
27
|
+
maxRegressionPoints: parseNumberParam(searchParams.get('maxRegressionPoints')),
|
|
28
|
+
})
|
|
29
|
+
: null
|
|
30
|
+
|
|
31
|
+
return NextResponse.json(buildReleaseReadinessReport({ pulse, evalGate }))
|
|
32
|
+
} catch (err: unknown) {
|
|
33
|
+
return NextResponse.json(
|
|
34
|
+
{ error: errorMessage(err) },
|
|
35
|
+
{ status: 500 },
|
|
36
|
+
)
|
|
37
|
+
}
|
|
38
|
+
}
|
package/src/cli/index.js
CHANGED
|
@@ -210,6 +210,7 @@ const COMMAND_GROUPS = [
|
|
|
210
210
|
description: 'Operator triage and readiness summaries',
|
|
211
211
|
commands: [
|
|
212
212
|
cmd('pulse', 'GET', '/operations/pulse', 'Get Operations Pulse summary (use --query range=24h or --query range=7d)'),
|
|
213
|
+
cmd('readiness', 'GET', '/quality/release-readiness', 'Get release readiness report (use --query agentId=... and --query suite=core for eval gate coverage)'),
|
|
213
214
|
],
|
|
214
215
|
},
|
|
215
216
|
{
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
summarizeEvalRuns,
|
|
16
16
|
summarizeRunHealth,
|
|
17
17
|
} from '@/lib/quality/quality-summary'
|
|
18
|
+
import type { ReleaseReadinessReport, ReleaseReadinessStatus } from '@/lib/quality/release-readiness'
|
|
18
19
|
import { cn } from '@/lib/utils'
|
|
19
20
|
import { useAppStore } from '@/stores/use-app-store'
|
|
20
21
|
import type { EvalEnvironmentPlan, EvalGateResult, EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
|
|
@@ -129,6 +130,130 @@ function gateCheckClass(status: EvalGateResult['status']): string {
|
|
|
129
130
|
return 'border-emerald-500/20 bg-emerald-500/[0.05] text-emerald-200'
|
|
130
131
|
}
|
|
131
132
|
|
|
133
|
+
function readinessStatusClass(status: ReleaseReadinessStatus): string {
|
|
134
|
+
if (status === 'ready') return 'border-emerald-500/25 bg-emerald-500/10 text-emerald-200'
|
|
135
|
+
if (status === 'warning') return 'border-amber-500/25 bg-amber-500/10 text-amber-200'
|
|
136
|
+
return 'border-rose-500/25 bg-rose-500/10 text-rose-200'
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function readinessScoreTone(status: ReleaseReadinessStatus): string {
|
|
140
|
+
if (status === 'ready') return 'text-emerald-300'
|
|
141
|
+
if (status === 'warning') return 'text-amber-300'
|
|
142
|
+
return 'text-rose-300'
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function ReleaseReadinessPanel({
|
|
146
|
+
report,
|
|
147
|
+
loading,
|
|
148
|
+
onRefresh,
|
|
149
|
+
onOpenHref,
|
|
150
|
+
}: {
|
|
151
|
+
report: ReleaseReadinessReport | null
|
|
152
|
+
loading: boolean
|
|
153
|
+
onRefresh: () => void
|
|
154
|
+
onOpenHref: (href: string) => void
|
|
155
|
+
}) {
|
|
156
|
+
return (
|
|
157
|
+
<section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
158
|
+
<div className="flex flex-col gap-3 lg:flex-row lg:items-start lg:justify-between">
|
|
159
|
+
<div>
|
|
160
|
+
<div className="text-[11px] font-700 uppercase tracking-[0.12em] text-accent-bright/70">Release Readiness</div>
|
|
161
|
+
<h2 className="mt-1 font-display text-[17px] font-700 text-text">Ship gate report</h2>
|
|
162
|
+
<p className="mt-1 max-w-[680px] text-[12px] leading-relaxed text-text-3/65">
|
|
163
|
+
Combines eval regression gates, operations pulse blockers, pending approvals, active runs, budgets, connectors, and gateway readiness.
|
|
164
|
+
</p>
|
|
165
|
+
</div>
|
|
166
|
+
<button
|
|
167
|
+
type="button"
|
|
168
|
+
onClick={onRefresh}
|
|
169
|
+
disabled={loading}
|
|
170
|
+
className="shrink-0 rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] font-800 text-text-2 transition-colors hover:bg-white/[0.08] disabled:opacity-40"
|
|
171
|
+
>
|
|
172
|
+
{loading ? 'Checking' : 'Refresh gate'}
|
|
173
|
+
</button>
|
|
174
|
+
</div>
|
|
175
|
+
|
|
176
|
+
{!report ? (
|
|
177
|
+
<div className="mt-4 rounded-[12px] border border-dashed border-white/[0.08] bg-white/[0.02] px-4 py-5 text-[12px] text-text-3/65">
|
|
178
|
+
{loading ? 'Building release readiness report...' : 'No release readiness report is available yet.'}
|
|
179
|
+
</div>
|
|
180
|
+
) : (
|
|
181
|
+
<div className="mt-4 grid gap-4 xl:grid-cols-[260px_1fr]">
|
|
182
|
+
<div className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-4">
|
|
183
|
+
<span className={cn('inline-flex rounded-full border px-2.5 py-1 text-[10px] font-800 uppercase tracking-[0.1em]', readinessStatusClass(report.status))}>
|
|
184
|
+
{report.status}
|
|
185
|
+
</span>
|
|
186
|
+
<div className={cn('mt-4 font-display text-[42px] font-700 tracking-[-0.04em]', readinessScoreTone(report.status))}>{report.score}</div>
|
|
187
|
+
<div className="mt-1 text-[12px] text-text-3/65">readiness score</div>
|
|
188
|
+
<div className="mt-4 grid grid-cols-2 gap-2">
|
|
189
|
+
<div className="rounded-[10px] bg-white/[0.035] px-3 py-2">
|
|
190
|
+
<div className="text-[10px] font-700 uppercase tracking-[0.1em] text-text-3/50">Blockers</div>
|
|
191
|
+
<div className="mt-1 text-[18px] font-800 text-text">{report.blockerCount}</div>
|
|
192
|
+
</div>
|
|
193
|
+
<div className="rounded-[10px] bg-white/[0.035] px-3 py-2">
|
|
194
|
+
<div className="text-[10px] font-700 uppercase tracking-[0.1em] text-text-3/50">Warnings</div>
|
|
195
|
+
<div className="mt-1 text-[18px] font-800 text-text">{report.warningCount}</div>
|
|
196
|
+
</div>
|
|
197
|
+
</div>
|
|
198
|
+
</div>
|
|
199
|
+
|
|
200
|
+
<div className="grid gap-3 lg:grid-cols-2">
|
|
201
|
+
<div className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
|
|
202
|
+
<div className="text-[12px] font-800 text-text">Checks</div>
|
|
203
|
+
<div className="mt-3 flex flex-col gap-2">
|
|
204
|
+
{report.checks.slice(0, 6).map((check) => (
|
|
205
|
+
<button
|
|
206
|
+
key={check.code}
|
|
207
|
+
type="button"
|
|
208
|
+
onClick={() => check.href && onOpenHref(check.href)}
|
|
209
|
+
className={cn(
|
|
210
|
+
'rounded-[10px] border px-3 py-2 text-left transition-colors',
|
|
211
|
+
readinessStatusClass(check.status),
|
|
212
|
+
check.href ? 'hover:bg-white/[0.08]' : '',
|
|
213
|
+
)}
|
|
214
|
+
>
|
|
215
|
+
<div className="text-[11px] font-800 uppercase tracking-[0.08em]">{check.status}</div>
|
|
216
|
+
<div className="mt-1 text-[12px] font-700 text-text">{check.title}</div>
|
|
217
|
+
<div className="mt-0.5 text-[11px] leading-relaxed text-text-3/70">{check.summary}</div>
|
|
218
|
+
</button>
|
|
219
|
+
))}
|
|
220
|
+
</div>
|
|
221
|
+
</div>
|
|
222
|
+
|
|
223
|
+
<div className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
|
|
224
|
+
<div className="text-[12px] font-800 text-text">Next actions</div>
|
|
225
|
+
<div className="mt-3 flex flex-col gap-2">
|
|
226
|
+
{report.nextActions.length === 0 ? (
|
|
227
|
+
<div className="rounded-[10px] border border-white/[0.06] bg-white/[0.025] px-3 py-4 text-[12px] text-text-3/65">
|
|
228
|
+
No triage actions are open in the selected window.
|
|
229
|
+
</div>
|
|
230
|
+
) : (
|
|
231
|
+
report.nextActions.slice(0, 5).map((action) => (
|
|
232
|
+
<button
|
|
233
|
+
key={action.id}
|
|
234
|
+
type="button"
|
|
235
|
+
onClick={() => onOpenHref(action.href)}
|
|
236
|
+
className="rounded-[10px] border border-white/[0.06] bg-white/[0.025] px-3 py-2 text-left transition-colors hover:bg-white/[0.06]"
|
|
237
|
+
>
|
|
238
|
+
<div className="flex items-center justify-between gap-2">
|
|
239
|
+
<div className="text-[12px] font-800 text-text">{action.title}</div>
|
|
240
|
+
<span className={cn('rounded-full border px-2 py-0.5 text-[9px] font-800 uppercase tracking-[0.08em]', action.severity === 'high' ? 'border-rose-500/25 text-rose-200' : action.severity === 'medium' ? 'border-amber-500/25 text-amber-200' : 'border-emerald-500/25 text-emerald-200')}>
|
|
241
|
+
{action.severity}
|
|
242
|
+
</span>
|
|
243
|
+
</div>
|
|
244
|
+
<div className="mt-1 line-clamp-2 text-[11px] leading-relaxed text-text-3/65">{action.summary}</div>
|
|
245
|
+
</button>
|
|
246
|
+
))
|
|
247
|
+
)}
|
|
248
|
+
</div>
|
|
249
|
+
</div>
|
|
250
|
+
</div>
|
|
251
|
+
</div>
|
|
252
|
+
)}
|
|
253
|
+
</section>
|
|
254
|
+
)
|
|
255
|
+
}
|
|
256
|
+
|
|
132
257
|
function EvalEnvironmentPanel({ plan, loading, onRefresh }: {
|
|
133
258
|
plan: EvalEnvironmentPlan | null
|
|
134
259
|
loading: boolean
|
|
@@ -344,6 +469,8 @@ export function QualityWorkspace() {
|
|
|
344
469
|
const [evalGateScope, setEvalGateScope] = useState<'scenario' | 'suite'>('scenario')
|
|
345
470
|
const [evalGateLoading, setEvalGateLoading] = useState(false)
|
|
346
471
|
const [evalBaselineBusy, setEvalBaselineBusy] = useState(false)
|
|
472
|
+
const [releaseReadiness, setReleaseReadiness] = useState<ReleaseReadinessReport | null>(null)
|
|
473
|
+
const [releaseReadinessLoading, setReleaseReadinessLoading] = useState(false)
|
|
347
474
|
const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
|
|
348
475
|
|
|
349
476
|
useEffect(() => {
|
|
@@ -432,6 +559,25 @@ export function QualityWorkspace() {
|
|
|
432
559
|
}
|
|
433
560
|
}, [evalGateScope, selectedAgentId, selectedScenarioId, selectedSuite])
|
|
434
561
|
|
|
562
|
+
const loadReleaseReadiness = useCallback(async () => {
|
|
563
|
+
const params = new URLSearchParams({ range: '7d' })
|
|
564
|
+
if (selectedAgentId) {
|
|
565
|
+
params.set('agentId', selectedAgentId)
|
|
566
|
+
if (evalGateScope === 'scenario' && selectedScenarioId) params.set('scenarioId', selectedScenarioId)
|
|
567
|
+
if (evalGateScope === 'suite') params.set('suite', selectedSuite)
|
|
568
|
+
}
|
|
569
|
+
setReleaseReadinessLoading(true)
|
|
570
|
+
try {
|
|
571
|
+
const report = await api<ReleaseReadinessReport>('GET', `/quality/release-readiness?${params.toString()}`)
|
|
572
|
+
setReleaseReadiness(report)
|
|
573
|
+
} catch (err) {
|
|
574
|
+
setReleaseReadiness(null)
|
|
575
|
+
toast.error(err instanceof Error ? err.message : 'Unable to check release readiness')
|
|
576
|
+
} finally {
|
|
577
|
+
setReleaseReadinessLoading(false)
|
|
578
|
+
}
|
|
579
|
+
}, [evalGateScope, selectedAgentId, selectedScenarioId, selectedSuite])
|
|
580
|
+
|
|
435
581
|
useEffect(() => {
|
|
436
582
|
void loadQualityData()
|
|
437
583
|
}, [loadQualityData])
|
|
@@ -454,6 +600,10 @@ export function QualityWorkspace() {
|
|
|
454
600
|
void loadEvalGate()
|
|
455
601
|
}, [loadEvalGate])
|
|
456
602
|
|
|
603
|
+
useEffect(() => {
|
|
604
|
+
void loadReleaseReadiness()
|
|
605
|
+
}, [loadReleaseReadiness])
|
|
606
|
+
|
|
457
607
|
useEffect(() => {
|
|
458
608
|
if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
|
|
459
609
|
setSelectedSuite(suites[0].name)
|
|
@@ -495,12 +645,13 @@ export function QualityWorkspace() {
|
|
|
495
645
|
await loadQualityData({ silent: true })
|
|
496
646
|
await loadEvalEnvironmentPlan()
|
|
497
647
|
await loadEvalGate()
|
|
648
|
+
await loadReleaseReadiness()
|
|
498
649
|
} catch (err) {
|
|
499
650
|
toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
|
|
500
651
|
} finally {
|
|
501
652
|
setEvalBusy(null)
|
|
502
653
|
}
|
|
503
|
-
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, selectedAgentId, selectedScenarioId])
|
|
654
|
+
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, loadReleaseReadiness, selectedAgentId, selectedScenarioId])
|
|
504
655
|
|
|
505
656
|
const runSuite = useCallback(async (suiteName: string) => {
|
|
506
657
|
if (!selectedAgentId) {
|
|
@@ -524,12 +675,13 @@ export function QualityWorkspace() {
|
|
|
524
675
|
await loadQualityData({ silent: true })
|
|
525
676
|
await loadEvalEnvironmentPlan()
|
|
526
677
|
await loadEvalGate()
|
|
678
|
+
await loadReleaseReadiness()
|
|
527
679
|
} catch (err) {
|
|
528
680
|
toast.error(err instanceof Error ? err.message : 'Eval suite failed')
|
|
529
681
|
} finally {
|
|
530
682
|
setEvalBusy(null)
|
|
531
683
|
}
|
|
532
|
-
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, selectedAgentId])
|
|
684
|
+
}, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, loadReleaseReadiness, selectedAgentId])
|
|
533
685
|
|
|
534
686
|
const setEvalBaseline = useCallback(async () => {
|
|
535
687
|
if (!selectedAgentId) {
|
|
@@ -547,13 +699,14 @@ export function QualityWorkspace() {
|
|
|
547
699
|
: { agentId: selectedAgentId, suite: selectedSuite, minPercent: evalGate?.minPercent ?? 80, maxRegressionPoints: evalGate?.maxRegressionPoints ?? 5 }
|
|
548
700
|
const result = await api<{ gate: EvalGateResult }>('POST', '/eval/baselines', body)
|
|
549
701
|
setEvalGate(result.gate)
|
|
702
|
+
await loadReleaseReadiness()
|
|
550
703
|
toast.success('Eval baseline saved')
|
|
551
704
|
} catch (err) {
|
|
552
705
|
toast.error(err instanceof Error ? err.message : 'Unable to save eval baseline')
|
|
553
706
|
} finally {
|
|
554
707
|
setEvalBaselineBusy(false)
|
|
555
708
|
}
|
|
556
|
-
}, [evalGate, evalGateScope, selectedAgentId, selectedScenarioId, selectedSuite])
|
|
709
|
+
}, [evalGate, evalGateScope, loadReleaseReadiness, selectedAgentId, selectedScenarioId, selectedSuite])
|
|
557
710
|
|
|
558
711
|
const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
|
|
559
712
|
setApprovalBusy(approval.id)
|
|
@@ -561,12 +714,13 @@ export function QualityWorkspace() {
|
|
|
561
714
|
await api('POST', '/approvals', { id: approval.id, approved })
|
|
562
715
|
toast.success(approved ? 'Approval granted' : 'Approval denied')
|
|
563
716
|
await loadQualityData({ silent: true })
|
|
717
|
+
await loadReleaseReadiness()
|
|
564
718
|
} catch (err) {
|
|
565
719
|
toast.error(err instanceof Error ? err.message : 'Unable to update approval')
|
|
566
720
|
} finally {
|
|
567
721
|
setApprovalBusy(null)
|
|
568
722
|
}
|
|
569
|
-
}, [loadQualityData])
|
|
723
|
+
}, [loadQualityData, loadReleaseReadiness])
|
|
570
724
|
|
|
571
725
|
if (loading) {
|
|
572
726
|
return (
|
|
@@ -630,6 +784,12 @@ export function QualityWorkspace() {
|
|
|
630
784
|
{activeTab === 'overview' && (
|
|
631
785
|
<div className="flex flex-col gap-6">
|
|
632
786
|
<OperationsPulsePanel defaultRange="7d" compact />
|
|
787
|
+
<ReleaseReadinessPanel
|
|
788
|
+
report={releaseReadiness}
|
|
789
|
+
loading={releaseReadinessLoading}
|
|
790
|
+
onRefresh={() => void loadReleaseReadiness()}
|
|
791
|
+
onOpenHref={(href) => router.push(href)}
|
|
792
|
+
/>
|
|
633
793
|
|
|
634
794
|
<div className="grid gap-3 md:grid-cols-2 xl:grid-cols-4">
|
|
635
795
|
<StatTile
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
|
|
4
|
+
import { buildReleaseReadinessReport } from './release-readiness'
|
|
5
|
+
import type { EvalGateResult } from '@/lib/server/eval/types'
|
|
6
|
+
import type { OperationPulse } from '@/types'
|
|
7
|
+
|
|
8
|
+
const now = 100_000
|
|
9
|
+
|
|
10
|
+
function pulse(overrides: Partial<OperationPulse> = {}): OperationPulse {
|
|
11
|
+
return {
|
|
12
|
+
generatedAt: now,
|
|
13
|
+
range: '24h',
|
|
14
|
+
windowStart: now - 86_400_000,
|
|
15
|
+
kpis: {
|
|
16
|
+
activeMissions: 0,
|
|
17
|
+
runningRuns: 0,
|
|
18
|
+
failedRuns: 0,
|
|
19
|
+
pendingApprovals: 0,
|
|
20
|
+
connectorAttention: 0,
|
|
21
|
+
gatewayAttention: 0,
|
|
22
|
+
budgetWarnings: 0,
|
|
23
|
+
},
|
|
24
|
+
actions: [],
|
|
25
|
+
...overrides,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function evalGate(overrides: Partial<EvalGateResult> = {}): EvalGateResult {
|
|
30
|
+
return {
|
|
31
|
+
agentId: 'agent_1',
|
|
32
|
+
scope: {
|
|
33
|
+
type: 'suite',
|
|
34
|
+
id: 'core',
|
|
35
|
+
label: 'core',
|
|
36
|
+
scenarioIds: ['coding-prime'],
|
|
37
|
+
},
|
|
38
|
+
status: 'pass',
|
|
39
|
+
generatedAt: now,
|
|
40
|
+
baseline: null,
|
|
41
|
+
latestRuns: [],
|
|
42
|
+
currentScore: 10,
|
|
43
|
+
currentMaxScore: 10,
|
|
44
|
+
currentPercent: 100,
|
|
45
|
+
regressionPoints: 0,
|
|
46
|
+
minPercent: 80,
|
|
47
|
+
maxRegressionPoints: 5,
|
|
48
|
+
checks: [{ code: 'score_threshold_met', status: 'pass', message: 'Current score meets the 80% gate.' }],
|
|
49
|
+
...overrides,
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
describe('release readiness report', () => {
|
|
54
|
+
it('passes when eval gate and operations pulse are clean', () => {
|
|
55
|
+
const report = buildReleaseReadinessReport({
|
|
56
|
+
pulse: pulse(),
|
|
57
|
+
evalGate: evalGate(),
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
assert.equal(report.status, 'ready')
|
|
61
|
+
assert.equal(report.score, 100)
|
|
62
|
+
assert.equal(report.blockerCount, 0)
|
|
63
|
+
assert.equal(report.warningCount, 0)
|
|
64
|
+
assert.ok(report.checks.some((check) => check.code === 'eval_gate_passed'))
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('warns when no eval gate is selected', () => {
|
|
68
|
+
const report = buildReleaseReadinessReport({
|
|
69
|
+
pulse: pulse(),
|
|
70
|
+
evalGate: null,
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
assert.equal(report.status, 'warning')
|
|
74
|
+
assert.equal(report.blockerCount, 0)
|
|
75
|
+
assert.equal(report.warningCount, 1)
|
|
76
|
+
assert.ok(report.score < 100)
|
|
77
|
+
assert.ok(report.checks.some((check) => check.code === 'eval_gate_missing'))
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('blocks when eval regression gate fails', () => {
|
|
81
|
+
const report = buildReleaseReadinessReport({
|
|
82
|
+
pulse: pulse(),
|
|
83
|
+
evalGate: evalGate({
|
|
84
|
+
status: 'fail',
|
|
85
|
+
currentPercent: 60,
|
|
86
|
+
checks: [{ code: 'score_below_threshold', status: 'fail', message: 'Current score is below the 80% gate.' }],
|
|
87
|
+
}),
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
assert.equal(report.status, 'blocked')
|
|
91
|
+
assert.equal(report.blockerCount, 1)
|
|
92
|
+
assert.ok(report.score <= 70)
|
|
93
|
+
assert.ok(report.checks.some((check) => check.code === 'eval_gate_failed'))
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('blocks on failed runs and pending approvals, then surfaces pulse actions', () => {
|
|
97
|
+
const report = buildReleaseReadinessReport({
|
|
98
|
+
pulse: pulse({
|
|
99
|
+
kpis: {
|
|
100
|
+
activeMissions: 1,
|
|
101
|
+
runningRuns: 1,
|
|
102
|
+
failedRuns: 2,
|
|
103
|
+
pendingApprovals: 3,
|
|
104
|
+
connectorAttention: 1,
|
|
105
|
+
gatewayAttention: 1,
|
|
106
|
+
budgetWarnings: 1,
|
|
107
|
+
},
|
|
108
|
+
actions: [{
|
|
109
|
+
id: 'run:failed',
|
|
110
|
+
kind: 'run',
|
|
111
|
+
severity: 'high',
|
|
112
|
+
title: 'Review failed run',
|
|
113
|
+
summary: 'Run failed',
|
|
114
|
+
href: '/quality?tab=runs',
|
|
115
|
+
evidence: ['run'],
|
|
116
|
+
createdAt: now,
|
|
117
|
+
}],
|
|
118
|
+
}),
|
|
119
|
+
evalGate: evalGate(),
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
assert.equal(report.status, 'blocked')
|
|
123
|
+
assert.equal(report.blockerCount, 2)
|
|
124
|
+
assert.ok(report.warningCount >= 4)
|
|
125
|
+
assert.equal(report.nextActions[0]?.id, 'run:failed')
|
|
126
|
+
assert.ok(report.checks.some((check) => check.code === 'failed_runs_present'))
|
|
127
|
+
assert.ok(report.checks.some((check) => check.code === 'pending_approvals_present'))
|
|
128
|
+
})
|
|
129
|
+
})
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import type { EvalGateResult } from '@/lib/server/eval/types'
|
|
2
|
+
import type { OperationPulse, OperationPulseAction, OperationPulseRange } from '@/types'
|
|
3
|
+
|
|
4
|
+
export type ReleaseReadinessStatus = 'ready' | 'warning' | 'blocked'
|
|
5
|
+
|
|
6
|
+
export interface ReleaseReadinessCheck {
|
|
7
|
+
code: string
|
|
8
|
+
status: ReleaseReadinessStatus
|
|
9
|
+
title: string
|
|
10
|
+
summary: string
|
|
11
|
+
href?: string
|
|
12
|
+
evidence?: string[]
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface ReleaseReadinessReport {
|
|
16
|
+
generatedAt: number
|
|
17
|
+
range: OperationPulseRange
|
|
18
|
+
status: ReleaseReadinessStatus
|
|
19
|
+
score: number
|
|
20
|
+
blockerCount: number
|
|
21
|
+
warningCount: number
|
|
22
|
+
pulse: OperationPulse
|
|
23
|
+
evalGate: EvalGateResult | null
|
|
24
|
+
checks: ReleaseReadinessCheck[]
|
|
25
|
+
nextActions: OperationPulseAction[]
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const BLOCKER_PENALTY = 30
|
|
29
|
+
const WARNING_PENALTY = 10
|
|
30
|
+
|
|
31
|
+
function readinessStatus(checks: ReleaseReadinessCheck[]): ReleaseReadinessStatus {
|
|
32
|
+
if (checks.some((check) => check.status === 'blocked')) return 'blocked'
|
|
33
|
+
if (checks.some((check) => check.status === 'warning')) return 'warning'
|
|
34
|
+
return 'ready'
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function readinessScore(checks: ReleaseReadinessCheck[]): number {
|
|
38
|
+
const penalty = checks.reduce((sum, check) => {
|
|
39
|
+
if (check.status === 'blocked') return sum + BLOCKER_PENALTY
|
|
40
|
+
if (check.status === 'warning') return sum + WARNING_PENALTY
|
|
41
|
+
return sum
|
|
42
|
+
}, 0)
|
|
43
|
+
return Math.max(0, 100 - penalty)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function plural(count: number, singular: string, pluralLabel = `${singular}s`): string {
|
|
47
|
+
return `${count} ${count === 1 ? singular : pluralLabel}`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function addCheck(checks: ReleaseReadinessCheck[], check: ReleaseReadinessCheck): void {
|
|
51
|
+
checks.push(check)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function buildReleaseReadinessReport(input: {
|
|
55
|
+
pulse: OperationPulse
|
|
56
|
+
evalGate?: EvalGateResult | null
|
|
57
|
+
}): ReleaseReadinessReport {
|
|
58
|
+
const checks: ReleaseReadinessCheck[] = []
|
|
59
|
+
const evalGate = input.evalGate ?? null
|
|
60
|
+
|
|
61
|
+
if (!evalGate) {
|
|
62
|
+
addCheck(checks, {
|
|
63
|
+
code: 'eval_gate_missing',
|
|
64
|
+
status: 'warning',
|
|
65
|
+
title: 'Select an eval gate',
|
|
66
|
+
summary: 'No eval regression gate is included in this readiness report.',
|
|
67
|
+
href: '/quality?tab=evals',
|
|
68
|
+
})
|
|
69
|
+
} else if (evalGate.status === 'fail') {
|
|
70
|
+
addCheck(checks, {
|
|
71
|
+
code: 'eval_gate_failed',
|
|
72
|
+
status: 'blocked',
|
|
73
|
+
title: 'Eval gate failed',
|
|
74
|
+
summary: `${evalGate.scope.label} is not passing the configured eval release gate.`,
|
|
75
|
+
href: '/quality?tab=evals',
|
|
76
|
+
evidence: evalGate.checks
|
|
77
|
+
.filter((check) => check.status === 'fail')
|
|
78
|
+
.map((check) => check.message),
|
|
79
|
+
})
|
|
80
|
+
} else if (evalGate.status === 'warn') {
|
|
81
|
+
addCheck(checks, {
|
|
82
|
+
code: 'eval_gate_warning',
|
|
83
|
+
status: 'warning',
|
|
84
|
+
title: 'Eval gate needs a baseline',
|
|
85
|
+
summary: `${evalGate.scope.label} passes the score threshold but still has release-gate warnings.`,
|
|
86
|
+
href: '/quality?tab=evals',
|
|
87
|
+
evidence: evalGate.checks
|
|
88
|
+
.filter((check) => check.status === 'warn')
|
|
89
|
+
.map((check) => check.message),
|
|
90
|
+
})
|
|
91
|
+
} else {
|
|
92
|
+
addCheck(checks, {
|
|
93
|
+
code: 'eval_gate_passed',
|
|
94
|
+
status: 'ready',
|
|
95
|
+
title: 'Eval gate passed',
|
|
96
|
+
summary: `${evalGate.scope.label} meets the configured score and regression checks.`,
|
|
97
|
+
href: '/quality?tab=evals',
|
|
98
|
+
evidence: [`${evalGate.currentPercent ?? 'n/a'}% current score`],
|
|
99
|
+
})
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (input.pulse.kpis.failedRuns > 0) {
|
|
103
|
+
addCheck(checks, {
|
|
104
|
+
code: 'failed_runs_present',
|
|
105
|
+
status: 'blocked',
|
|
106
|
+
title: 'Failed runs need review',
|
|
107
|
+
summary: `${plural(input.pulse.kpis.failedRuns, 'failed run')} found in the ${input.pulse.range} operations window.`,
|
|
108
|
+
href: '/quality?tab=runs',
|
|
109
|
+
})
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (input.pulse.kpis.pendingApprovals > 0) {
|
|
113
|
+
addCheck(checks, {
|
|
114
|
+
code: 'pending_approvals_present',
|
|
115
|
+
status: 'blocked',
|
|
116
|
+
title: 'Pending approvals need decisions',
|
|
117
|
+
summary: `${plural(input.pulse.kpis.pendingApprovals, 'approval')} still waiting on an operator.`,
|
|
118
|
+
href: '/quality?tab=approvals',
|
|
119
|
+
})
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (input.pulse.kpis.runningRuns > 0) {
|
|
123
|
+
addCheck(checks, {
|
|
124
|
+
code: 'active_runs_present',
|
|
125
|
+
status: 'warning',
|
|
126
|
+
title: 'Runs are still active',
|
|
127
|
+
summary: `${plural(input.pulse.kpis.runningRuns, 'run')} queued or running while this report was generated.`,
|
|
128
|
+
href: '/runs',
|
|
129
|
+
})
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (input.pulse.kpis.connectorAttention > 0) {
|
|
133
|
+
addCheck(checks, {
|
|
134
|
+
code: 'connector_attention_present',
|
|
135
|
+
status: 'warning',
|
|
136
|
+
title: 'Connector readiness needs attention',
|
|
137
|
+
summary: `${plural(input.pulse.kpis.connectorAttention, 'connector')} reporting degraded readiness.`,
|
|
138
|
+
href: '/connectors',
|
|
139
|
+
})
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (input.pulse.kpis.gatewayAttention > 0) {
|
|
143
|
+
addCheck(checks, {
|
|
144
|
+
code: 'gateway_attention_present',
|
|
145
|
+
status: 'warning',
|
|
146
|
+
title: 'Gateway readiness needs attention',
|
|
147
|
+
summary: `${plural(input.pulse.kpis.gatewayAttention, 'gateway')} reporting topology or environment warnings.`,
|
|
148
|
+
href: '/providers',
|
|
149
|
+
})
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (input.pulse.kpis.budgetWarnings > 0) {
|
|
153
|
+
addCheck(checks, {
|
|
154
|
+
code: 'budget_warnings_present',
|
|
155
|
+
status: 'warning',
|
|
156
|
+
title: 'Mission budget pressure',
|
|
157
|
+
summary: `${plural(input.pulse.kpis.budgetWarnings, 'mission')} near a configured budget limit.`,
|
|
158
|
+
href: '/missions',
|
|
159
|
+
})
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (input.pulse.kpis.activeMissions > 0) {
|
|
163
|
+
addCheck(checks, {
|
|
164
|
+
code: 'active_missions_present',
|
|
165
|
+
status: 'warning',
|
|
166
|
+
title: 'Missions are still active',
|
|
167
|
+
summary: `${plural(input.pulse.kpis.activeMissions, 'mission')} running or paused in the operations window.`,
|
|
168
|
+
href: '/missions',
|
|
169
|
+
})
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const blockerCount = checks.filter((check) => check.status === 'blocked').length
|
|
173
|
+
const warningCount = checks.filter((check) => check.status === 'warning').length
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
generatedAt: input.pulse.generatedAt,
|
|
177
|
+
range: input.pulse.range,
|
|
178
|
+
status: readinessStatus(checks),
|
|
179
|
+
score: readinessScore(checks),
|
|
180
|
+
blockerCount,
|
|
181
|
+
warningCount,
|
|
182
|
+
pulse: input.pulse,
|
|
183
|
+
evalGate,
|
|
184
|
+
checks,
|
|
185
|
+
nextActions: input.pulse.actions.slice(0, 8),
|
|
186
|
+
}
|
|
187
|
+
}
|