@swarmclawai/swarmclaw 1.9.6 → 1.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -399,6 +399,25 @@ Operational docs: https://swarmclaw.ai/docs/observability
399
399
 
400
400
  ## Releases
401
401
 
402
+ ### v1.9.8 Highlights
403
+
404
+ Bundled release-readiness release: a single operator report that combines eval gates, operations blockers, approvals, and runtime readiness.
405
+
406
+ - **Release readiness report.** `/api/quality/release-readiness` returns a scored ready/warning/blocked report built from eval regression gates and Operations Pulse evidence.
407
+ - **Quality Center ship gate.** The Quality overview now shows readiness score, blockers, warnings, checks, and next actions before operators cut a release.
408
+ - **CLI readiness checks.** `swarmclaw operations readiness` exposes the same report for scripts and CI.
409
+ - **Browser coverage.** The e2e smoke now verifies the release-readiness panel on `/quality`.
410
+
411
+ ### v1.9.7 Highlights
412
+
413
+ Bundled eval-gate release: approved baselines, regression checks, and Quality Center release gates for repeatable eval evidence.
414
+
415
+ - **Eval regression baselines.** Operators can snapshot the latest scenario or suite score as an approved baseline with minimum score and regression allowance settings.
416
+ - **Release gate API.** `/api/eval/gate` compares current eval evidence against thresholds and baselines, while `/api/eval/baselines` lists and updates approved baselines.
417
+ - **CLI gate checks.** `swarmclaw eval gate`, `swarmclaw eval baselines`, and `swarmclaw eval baseline-set` expose the same release-gate workflow from automation.
418
+ - **Quality Center gate panel.** Eval Lab now shows pass/warn/fail status, latest-run coverage, current score, baseline score, regression points, and actionable checks.
419
+ - **Public-source hygiene.** Generic implementation comments now describe SwarmClaw behavior without naming internal comparison sources.
420
+
402
421
  ### v1.9.6 Highlights
403
422
 
404
423
  Bundled eval-environment release: validation preflights, deterministic eval workspaces, and clearer operator readiness before spending run budget.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@swarmclawai/swarmclaw",
3
- "version": "1.9.6",
3
+ "version": "1.9.8",
4
4
  "description": "Build and run autonomous AI agents with OpenClaw, Hermes, multiple model providers, orchestration, delegation, memory, skills, schedules, and chat connectors.",
5
5
  "main": "electron-dist/main.js",
6
6
  "license": "MIT",
@@ -87,7 +87,7 @@
87
87
  "test:cli": "node --test src/cli/*.test.js bin/*.test.js scripts/electron-after-pack.test.mjs scripts/ensure-sandbox-browser-image.test.mjs scripts/postinstall.test.mjs scripts/run-next-build.test.mjs scripts/run-next-typegen.test.mjs",
88
88
  "test:setup": "tsx --test src/app/api/setup/check-provider/route.test.ts src/lib/server/provider-model-discovery.test.ts src/components/auth/setup-wizard/utils.test.ts src/components/auth/setup-wizard/types.test.ts src/hooks/setup-done-detection.test.ts src/lib/setup-defaults.test.ts src/lib/server/storage-auth.test.ts src/lib/server/storage-auth-docker.test.ts",
89
89
  "test:openclaw": "tsx --test src/lib/openclaw/openclaw-agent-id.test.ts src/lib/openclaw/openclaw-endpoint.test.ts src/lib/server/agents/agent-runtime-config.test.ts src/lib/server/build-llm.test.ts src/lib/server/connectors/connector-routing.test.ts src/lib/server/connectors/openclaw.test.ts src/lib/server/connectors/swarmdock.test.ts src/lib/server/gateway/protocol.test.ts src/lib/server/gateways/gateway-topology.test.ts src/lib/server/llm-response-cache.test.ts src/lib/server/mcp-conformance.test.ts src/lib/server/openclaw/agent-resolver.test.ts src/lib/server/openclaw/deploy.test.ts src/lib/server/openclaw/skills-normalize.test.ts src/lib/server/session-tools/openclaw-nodes.test.ts src/lib/server/session-tools/swarmdock.test.ts src/lib/server/tasks/task-quality-gate.test.ts src/lib/server/tasks/task-validation.test.ts src/lib/server/tool-capability-policy.test.ts src/lib/providers/openai.test.ts src/lib/providers/openclaw-exports.test.ts src/app/api/gateways/topology-route.test.ts src/app/api/openclaw/dashboard-url/route.test.ts",
90
- "test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/tts/route.test.ts",
90
+ "test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/baseline.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/quality/release-readiness.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/tts/route.test.ts",
91
91
  "test:builder": "tsx --test src/features/protocols/builder/utils/nodes-to-template.test.ts src/features/protocols/builder/utils/template-to-nodes.test.ts src/features/protocols/builder/validators/dag-validator.test.ts",
92
92
  "test:e2e": "node --import tsx scripts/browser-e2e-smoke.ts",
93
93
  "test:mcp:conformance": "node --import tsx ./scripts/mcp-conformance-check.ts",
@@ -0,0 +1,55 @@
1
+ import { NextResponse } from 'next/server'
2
+ import { z } from 'zod'
3
+ import { evaluateEvalGate, listEvalBaselinesForAgent, setEvalBaseline } from '@/lib/server/eval/baseline'
4
+ import { errorMessage } from '@/lib/shared-utils'
5
+
6
+ const BaselineSchema = z.object({
7
+ agentId: z.string().min(1),
8
+ scenarioId: z.string().min(1).nullable().optional(),
9
+ suite: z.string().min(1).nullable().optional(),
10
+ minPercent: z.number().min(0).max(100).nullable().optional(),
11
+ maxRegressionPoints: z.number().min(0).max(100).nullable().optional(),
12
+ label: z.string().max(160).nullable().optional(),
13
+ notes: z.string().max(1_000).nullable().optional(),
14
+ })
15
+
16
+ export async function GET(req: Request) {
17
+ try {
18
+ const { searchParams } = new URL(req.url)
19
+ const agentId = searchParams.get('agentId')
20
+ return NextResponse.json(listEvalBaselinesForAgent(agentId))
21
+ } catch (err: unknown) {
22
+ return NextResponse.json(
23
+ { error: errorMessage(err) },
24
+ { status: 500 },
25
+ )
26
+ }
27
+ }
28
+
29
+ export async function POST(req: Request) {
30
+ try {
31
+ const body: unknown = await req.json()
32
+ const parsed = BaselineSchema.safeParse(body)
33
+ if (!parsed.success) {
34
+ return NextResponse.json(
35
+ { error: parsed.error.issues.map((issue) => issue.message).join(', ') },
36
+ { status: 400 },
37
+ )
38
+ }
39
+
40
+ const baseline = setEvalBaseline(parsed.data)
41
+ const gate = evaluateEvalGate({
42
+ agentId: parsed.data.agentId,
43
+ scenarioId: parsed.data.scenarioId,
44
+ suite: parsed.data.suite,
45
+ minPercent: parsed.data.minPercent,
46
+ maxRegressionPoints: parsed.data.maxRegressionPoints,
47
+ })
48
+ return NextResponse.json({ baseline, gate })
49
+ } catch (err: unknown) {
50
+ return NextResponse.json(
51
+ { error: errorMessage(err) },
52
+ { status: 500 },
53
+ )
54
+ }
55
+ }
@@ -0,0 +1,36 @@
1
+ import { NextResponse } from 'next/server'
2
+ import { evaluateEvalGate } from '@/lib/server/eval/baseline'
3
+ import { errorMessage } from '@/lib/shared-utils'
4
+
5
+ function parseNumberParam(value: string | null): number | null {
6
+ if (value == null || value.trim() === '') return null
7
+ const parsed = Number(value)
8
+ return Number.isFinite(parsed) ? parsed : null
9
+ }
10
+
11
+ export async function GET(req: Request) {
12
+ try {
13
+ const { searchParams } = new URL(req.url)
14
+ const agentId = searchParams.get('agentId') || ''
15
+ if (!agentId) {
16
+ return NextResponse.json(
17
+ { error: 'agentId is required' },
18
+ { status: 400 },
19
+ )
20
+ }
21
+
22
+ const result = evaluateEvalGate({
23
+ agentId,
24
+ scenarioId: searchParams.get('scenarioId'),
25
+ suite: searchParams.get('suite'),
26
+ minPercent: parseNumberParam(searchParams.get('minPercent')),
27
+ maxRegressionPoints: parseNumberParam(searchParams.get('maxRegressionPoints')),
28
+ })
29
+ return NextResponse.json(result)
30
+ } catch (err: unknown) {
31
+ return NextResponse.json(
32
+ { error: errorMessage(err) },
33
+ { status: 500 },
34
+ )
35
+ }
36
+ }
@@ -0,0 +1,38 @@
1
+ import { NextResponse } from 'next/server'
2
+ import { evaluateEvalGate } from '@/lib/server/eval/baseline'
3
+ import { getOperationPulse, normalizeOperationPulseRange } from '@/lib/server/operations/operation-pulse'
4
+ import { buildReleaseReadinessReport } from '@/lib/quality/release-readiness'
5
+ import { errorMessage } from '@/lib/shared-utils'
6
+
7
+ export const dynamic = 'force-dynamic'
8
+
9
+ function parseNumberParam(value: string | null): number | null {
10
+ if (value == null || value.trim() === '') return null
11
+ const parsed = Number(value)
12
+ return Number.isFinite(parsed) ? parsed : null
13
+ }
14
+
15
+ export async function GET(req: Request) {
16
+ try {
17
+ const { searchParams } = new URL(req.url)
18
+ const range = normalizeOperationPulseRange(searchParams.get('range'))
19
+ const agentId = searchParams.get('agentId') || ''
20
+ const pulse = getOperationPulse(range)
21
+ const evalGate = agentId
22
+ ? evaluateEvalGate({
23
+ agentId,
24
+ scenarioId: searchParams.get('scenarioId'),
25
+ suite: searchParams.get('suite'),
26
+ minPercent: parseNumberParam(searchParams.get('minPercent')),
27
+ maxRegressionPoints: parseNumberParam(searchParams.get('maxRegressionPoints')),
28
+ })
29
+ : null
30
+
31
+ return NextResponse.json(buildReleaseReadinessReport({ pulse, evalGate }))
32
+ } catch (err: unknown) {
33
+ return NextResponse.json(
34
+ { error: errorMessage(err) },
35
+ { status: 500 },
36
+ )
37
+ }
38
+ }
package/src/cli/index.js CHANGED
@@ -210,6 +210,7 @@ const COMMAND_GROUPS = [
210
210
  description: 'Operator triage and readiness summaries',
211
211
  commands: [
212
212
  cmd('pulse', 'GET', '/operations/pulse', 'Get Operations Pulse summary (use --query range=24h or --query range=7d)'),
213
+ cmd('readiness', 'GET', '/quality/release-readiness', 'Get release readiness report (use --query agentId=... and --query suite=core for eval gate coverage)'),
213
214
  ],
214
215
  },
215
216
  {
@@ -232,9 +233,12 @@ const COMMAND_GROUPS = [
232
233
  cmd('suites', 'GET', '/eval/suites', 'List available eval suites (core, swe-bench-lite, gaia-l1, ...)'),
233
234
  cmd('status', 'GET', '/eval/run', 'Get eval run status'),
234
235
  cmd('environment', 'GET', '/eval/environments', 'Preview validation environment readiness for an eval'),
236
+ cmd('baselines', 'GET', '/eval/baselines', 'List eval regression baselines'),
237
+ cmd('gate', 'GET', '/eval/gate', 'Check the latest eval score against thresholds and baseline'),
235
238
  cmd('run', 'POST', '/eval/run', 'Run an eval scenario against an agent', { expectsJsonBody: true }),
236
239
  cmd('suite', 'POST', '/eval/suite', 'Run a full eval suite against an agent (pass { suite: "swe-bench-lite" } in body)', { expectsJsonBody: true }),
237
240
  cmd('environment-prepare', 'POST', '/eval/environments', 'Prepare validation environment readiness for an eval', { expectsJsonBody: true }),
241
+ cmd('baseline-set', 'POST', '/eval/baselines', 'Set an eval regression baseline from latest completed runs', { expectsJsonBody: true }),
238
242
  ],
239
243
  },
240
244
  {
@@ -15,9 +15,10 @@ import {
15
15
  summarizeEvalRuns,
16
16
  summarizeRunHealth,
17
17
  } from '@/lib/quality/quality-summary'
18
+ import type { ReleaseReadinessReport, ReleaseReadinessStatus } from '@/lib/quality/release-readiness'
18
19
  import { cn } from '@/lib/utils'
19
20
  import { useAppStore } from '@/stores/use-app-store'
20
- import type { EvalEnvironmentPlan, EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
21
+ import type { EvalEnvironmentPlan, EvalGateResult, EvalRun, EvalSuiteResult } from '@/lib/server/eval/types'
21
22
  import type { Agent, ApprovalRequest, SessionRunRecord } from '@/types'
22
23
 
23
24
  type QualityTab = 'overview' | 'evals' | 'approvals' | 'runs'
@@ -117,6 +118,142 @@ function checkClass(level: 'info' | 'warn' | 'error'): string {
117
118
  return 'border-white/[0.06] bg-white/[0.025] text-text-3'
118
119
  }
119
120
 
121
+ function gateStatusClass(status: EvalGateResult['status']): string {
122
+ if (status === 'pass') return 'border-emerald-500/25 bg-emerald-500/10 text-emerald-200'
123
+ if (status === 'warn') return 'border-amber-500/25 bg-amber-500/10 text-amber-200'
124
+ return 'border-rose-500/25 bg-rose-500/10 text-rose-200'
125
+ }
126
+
127
+ function gateCheckClass(status: EvalGateResult['status']): string {
128
+ if (status === 'fail') return 'border-rose-500/20 bg-rose-500/[0.05] text-rose-200'
129
+ if (status === 'warn') return 'border-amber-500/20 bg-amber-500/[0.05] text-amber-200'
130
+ return 'border-emerald-500/20 bg-emerald-500/[0.05] text-emerald-200'
131
+ }
132
+
133
+ function readinessStatusClass(status: ReleaseReadinessStatus): string {
134
+ if (status === 'ready') return 'border-emerald-500/25 bg-emerald-500/10 text-emerald-200'
135
+ if (status === 'warning') return 'border-amber-500/25 bg-amber-500/10 text-amber-200'
136
+ return 'border-rose-500/25 bg-rose-500/10 text-rose-200'
137
+ }
138
+
139
+ function readinessScoreTone(status: ReleaseReadinessStatus): string {
140
+ if (status === 'ready') return 'text-emerald-300'
141
+ if (status === 'warning') return 'text-amber-300'
142
+ return 'text-rose-300'
143
+ }
144
+
145
+ function ReleaseReadinessPanel({
146
+ report,
147
+ loading,
148
+ onRefresh,
149
+ onOpenHref,
150
+ }: {
151
+ report: ReleaseReadinessReport | null
152
+ loading: boolean
153
+ onRefresh: () => void
154
+ onOpenHref: (href: string) => void
155
+ }) {
156
+ return (
157
+ <section className="rounded-[16px] border border-white/[0.06] bg-white/[0.025] p-4">
158
+ <div className="flex flex-col gap-3 lg:flex-row lg:items-start lg:justify-between">
159
+ <div>
160
+ <div className="text-[11px] font-700 uppercase tracking-[0.12em] text-accent-bright/70">Release Readiness</div>
161
+ <h2 className="mt-1 font-display text-[17px] font-700 text-text">Ship gate report</h2>
162
+ <p className="mt-1 max-w-[680px] text-[12px] leading-relaxed text-text-3/65">
163
+ Combines eval regression gates, operations pulse blockers, pending approvals, active runs, budgets, connectors, and gateway readiness.
164
+ </p>
165
+ </div>
166
+ <button
167
+ type="button"
168
+ onClick={onRefresh}
169
+ disabled={loading}
170
+ className="shrink-0 rounded-[10px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[12px] font-800 text-text-2 transition-colors hover:bg-white/[0.08] disabled:opacity-40"
171
+ >
172
+ {loading ? 'Checking' : 'Refresh gate'}
173
+ </button>
174
+ </div>
175
+
176
+ {!report ? (
177
+ <div className="mt-4 rounded-[12px] border border-dashed border-white/[0.08] bg-white/[0.02] px-4 py-5 text-[12px] text-text-3/65">
178
+ {loading ? 'Building release readiness report...' : 'No release readiness report is available yet.'}
179
+ </div>
180
+ ) : (
181
+ <div className="mt-4 grid gap-4 xl:grid-cols-[260px_1fr]">
182
+ <div className="rounded-[14px] border border-white/[0.06] bg-white/[0.025] p-4">
183
+ <span className={cn('inline-flex rounded-full border px-2.5 py-1 text-[10px] font-800 uppercase tracking-[0.1em]', readinessStatusClass(report.status))}>
184
+ {report.status}
185
+ </span>
186
+ <div className={cn('mt-4 font-display text-[42px] font-700 tracking-[-0.04em]', readinessScoreTone(report.status))}>{report.score}</div>
187
+ <div className="mt-1 text-[12px] text-text-3/65">readiness score</div>
188
+ <div className="mt-4 grid grid-cols-2 gap-2">
189
+ <div className="rounded-[10px] bg-white/[0.035] px-3 py-2">
190
+ <div className="text-[10px] font-700 uppercase tracking-[0.1em] text-text-3/50">Blockers</div>
191
+ <div className="mt-1 text-[18px] font-800 text-text">{report.blockerCount}</div>
192
+ </div>
193
+ <div className="rounded-[10px] bg-white/[0.035] px-3 py-2">
194
+ <div className="text-[10px] font-700 uppercase tracking-[0.1em] text-text-3/50">Warnings</div>
195
+ <div className="mt-1 text-[18px] font-800 text-text">{report.warningCount}</div>
196
+ </div>
197
+ </div>
198
+ </div>
199
+
200
+ <div className="grid gap-3 lg:grid-cols-2">
201
+ <div className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
202
+ <div className="text-[12px] font-800 text-text">Checks</div>
203
+ <div className="mt-3 flex flex-col gap-2">
204
+ {report.checks.slice(0, 6).map((check) => (
205
+ <button
206
+ key={check.code}
207
+ type="button"
208
+ onClick={() => check.href && onOpenHref(check.href)}
209
+ className={cn(
210
+ 'rounded-[10px] border px-3 py-2 text-left transition-colors',
211
+ readinessStatusClass(check.status),
212
+ check.href ? 'hover:bg-white/[0.08]' : '',
213
+ )}
214
+ >
215
+ <div className="text-[11px] font-800 uppercase tracking-[0.08em]">{check.status}</div>
216
+ <div className="mt-1 text-[12px] font-700 text-text">{check.title}</div>
217
+ <div className="mt-0.5 text-[11px] leading-relaxed text-text-3/70">{check.summary}</div>
218
+ </button>
219
+ ))}
220
+ </div>
221
+ </div>
222
+
223
+ <div className="rounded-[14px] border border-white/[0.06] bg-white/[0.02] p-3">
224
+ <div className="text-[12px] font-800 text-text">Next actions</div>
225
+ <div className="mt-3 flex flex-col gap-2">
226
+ {report.nextActions.length === 0 ? (
227
+ <div className="rounded-[10px] border border-white/[0.06] bg-white/[0.025] px-3 py-4 text-[12px] text-text-3/65">
228
+ No triage actions are open in the selected window.
229
+ </div>
230
+ ) : (
231
+ report.nextActions.slice(0, 5).map((action) => (
232
+ <button
233
+ key={action.id}
234
+ type="button"
235
+ onClick={() => onOpenHref(action.href)}
236
+ className="rounded-[10px] border border-white/[0.06] bg-white/[0.025] px-3 py-2 text-left transition-colors hover:bg-white/[0.06]"
237
+ >
238
+ <div className="flex items-center justify-between gap-2">
239
+ <div className="text-[12px] font-800 text-text">{action.title}</div>
240
+ <span className={cn('rounded-full border px-2 py-0.5 text-[9px] font-800 uppercase tracking-[0.08em]', action.severity === 'high' ? 'border-rose-500/25 text-rose-200' : action.severity === 'medium' ? 'border-amber-500/25 text-amber-200' : 'border-emerald-500/25 text-emerald-200')}>
241
+ {action.severity}
242
+ </span>
243
+ </div>
244
+ <div className="mt-1 line-clamp-2 text-[11px] leading-relaxed text-text-3/65">{action.summary}</div>
245
+ </button>
246
+ ))
247
+ )}
248
+ </div>
249
+ </div>
250
+ </div>
251
+ </div>
252
+ )}
253
+ </section>
254
+ )
255
+ }
256
+
120
257
  function EvalEnvironmentPanel({ plan, loading, onRefresh }: {
121
258
  plan: EvalEnvironmentPlan | null
122
259
  loading: boolean
@@ -195,6 +332,115 @@ function EvalEnvironmentPanel({ plan, loading, onRefresh }: {
195
332
  )
196
333
  }
197
334
 
335
+ function EvalGatePanel({
336
+ gate,
337
+ loading,
338
+ busy,
339
+ scope,
340
+ onScopeChange,
341
+ onRefresh,
342
+ onSetBaseline,
343
+ }: {
344
+ gate: EvalGateResult | null
345
+ loading: boolean
346
+ busy: boolean
347
+ scope: 'scenario' | 'suite'
348
+ onScopeChange: (scope: 'scenario' | 'suite') => void
349
+ onRefresh: () => void
350
+ onSetBaseline: () => void
351
+ }) {
352
+ return (
353
+ <div className="rounded-[12px] border border-white/[0.06] bg-white/[0.025] px-3 py-3">
354
+ <div className="flex items-start justify-between gap-3">
355
+ <div>
356
+ <div className="text-[13px] font-800 text-text">Regression gate</div>
357
+ <p className="mt-1 text-[11px] leading-relaxed text-text-3/65">
358
+ Compare latest eval evidence against thresholds and an approved baseline.
359
+ </p>
360
+ </div>
361
+ <button
362
+ type="button"
363
+ onClick={onRefresh}
364
+ disabled={loading}
365
+ className="shrink-0 rounded-[8px] border border-white/[0.08] px-2 py-1 text-[10px] font-800 text-text-2 transition-colors hover:bg-white/[0.06] disabled:opacity-40"
366
+ >
367
+ {loading ? 'Checking' : 'Refresh'}
368
+ </button>
369
+ </div>
370
+
371
+ <div className="mt-3 flex rounded-[10px] border border-white/[0.06] bg-white/[0.025] p-1">
372
+ {(['scenario', 'suite'] as const).map((item) => (
373
+ <button
374
+ key={item}
375
+ type="button"
376
+ onClick={() => onScopeChange(item)}
377
+ className={cn(
378
+ 'flex-1 rounded-[8px] px-2 py-1.5 text-[10px] font-800 uppercase tracking-[0.08em] transition-colors',
379
+ scope === item ? 'bg-white/[0.1] text-text' : 'text-text-3 hover:bg-white/[0.05]',
380
+ )}
381
+ >
382
+ {item}
383
+ </button>
384
+ ))}
385
+ </div>
386
+
387
+ {!gate ? (
388
+ <div className="mt-3 text-[11px] text-text-3/60">{loading ? 'Checking gate...' : 'Run evals to build gate evidence.'}</div>
389
+ ) : (
390
+ <div className="mt-3 flex flex-col gap-3">
391
+ <div className="flex flex-wrap items-center gap-2">
392
+ <span className={cn('rounded-full border px-2 py-1 text-[10px] font-800 uppercase tracking-[0.08em]', gateStatusClass(gate.status))}>
393
+ {gate.status}
394
+ </span>
395
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
396
+ {gate.scope.label}
397
+ </span>
398
+ <span className="rounded-full bg-white/[0.05] px-2 py-1 text-[10px] font-700 text-text-3">
399
+ {gate.latestRuns.length}/{gate.scope.scenarioIds.length} latest runs
400
+ </span>
401
+ </div>
402
+
403
+ <div className="grid grid-cols-3 gap-2">
404
+ <div className="rounded-[10px] border border-white/[0.06] bg-white/[0.02] px-2 py-2">
405
+ <div className="text-[9px] font-800 uppercase tracking-[0.08em] text-text-3/50">Current</div>
406
+ <div className="mt-1 text-[14px] font-800 text-text">{formatPercent(gate.currentPercent)}</div>
407
+ </div>
408
+ <div className="rounded-[10px] border border-white/[0.06] bg-white/[0.02] px-2 py-2">
409
+ <div className="text-[9px] font-800 uppercase tracking-[0.08em] text-text-3/50">Baseline</div>
410
+ <div className="mt-1 text-[14px] font-800 text-text">{gate.baseline ? `${gate.baseline.baselinePercent}%` : 'none'}</div>
411
+ </div>
412
+ <div className="rounded-[10px] border border-white/[0.06] bg-white/[0.02] px-2 py-2">
413
+ <div className="text-[9px] font-800 uppercase tracking-[0.08em] text-text-3/50">Regression</div>
414
+ <div className="mt-1 text-[14px] font-800 text-text">{gate.regressionPoints == null ? 'n/a' : `${gate.regressionPoints}pt`}</div>
415
+ </div>
416
+ </div>
417
+
418
+ <div className="flex flex-col gap-1.5">
419
+ {gate.checks.slice(0, 4).map((check) => (
420
+ <div key={`${check.code}:${check.message}`} className={cn('rounded-[9px] border px-2.5 py-2 text-[11px] leading-relaxed', gateCheckClass(check.status))}>
421
+ <span className="font-800 uppercase tracking-[0.08em]">{check.status}</span>
422
+ <span className="ml-2">{check.message}</span>
423
+ </div>
424
+ ))}
425
+ {gate.checks.length > 4 && (
426
+ <div className="text-[10px] text-text-3/55">+{gate.checks.length - 4} more check{gate.checks.length - 4 === 1 ? '' : 's'}</div>
427
+ )}
428
+ </div>
429
+
430
+ <button
431
+ type="button"
432
+ onClick={onSetBaseline}
433
+ disabled={busy || gate.latestRuns.length === 0 || gate.checks.some((check) => check.code === 'missing_scope_runs')}
434
+ className="rounded-[9px] border border-white/[0.08] bg-white/[0.04] px-3 py-2 text-[11px] font-800 text-text-2 transition-colors hover:bg-white/[0.08] disabled:cursor-not-allowed disabled:opacity-40"
435
+ >
436
+ {busy ? 'Saving baseline' : gate.baseline ? 'Update baseline' : 'Set baseline'}
437
+ </button>
438
+ </div>
439
+ )}
440
+ </div>
441
+ )
442
+ }
443
+
198
444
  export function QualityWorkspace() {
199
445
  const router = useRouter()
200
446
  const searchParams = useSearchParams()
@@ -219,6 +465,12 @@ export function QualityWorkspace() {
219
465
  const [evalBusy, setEvalBusy] = useState<string | null>(null)
220
466
  const [evalEnvironmentPlan, setEvalEnvironmentPlan] = useState<EvalEnvironmentPlan | null>(null)
221
467
  const [evalEnvironmentLoading, setEvalEnvironmentLoading] = useState(false)
468
+ const [evalGate, setEvalGate] = useState<EvalGateResult | null>(null)
469
+ const [evalGateScope, setEvalGateScope] = useState<'scenario' | 'suite'>('scenario')
470
+ const [evalGateLoading, setEvalGateLoading] = useState(false)
471
+ const [evalBaselineBusy, setEvalBaselineBusy] = useState(false)
472
+ const [releaseReadiness, setReleaseReadiness] = useState<ReleaseReadinessReport | null>(null)
473
+ const [releaseReadinessLoading, setReleaseReadinessLoading] = useState(false)
222
474
  const [approvalBusy, setApprovalBusy] = useState<string | null>(null)
223
475
 
224
476
  useEffect(() => {
@@ -283,6 +535,49 @@ export function QualityWorkspace() {
283
535
  }
284
536
  }, [selectedAgentId, selectedScenarioId, selectedSuite])
285
537
 
538
+ const loadEvalGate = useCallback(async () => {
539
+ if (!selectedAgentId) {
540
+ setEvalGate(null)
541
+ return
542
+ }
543
+ if (evalGateScope === 'scenario' && !selectedScenarioId) {
544
+ setEvalGate(null)
545
+ return
546
+ }
547
+ const params = new URLSearchParams({ agentId: selectedAgentId })
548
+ if (evalGateScope === 'scenario') params.set('scenarioId', selectedScenarioId)
549
+ else params.set('suite', selectedSuite)
550
+ setEvalGateLoading(true)
551
+ try {
552
+ const gate = await api<EvalGateResult>('GET', `/eval/gate?${params.toString()}`)
553
+ setEvalGate(gate)
554
+ } catch (err) {
555
+ setEvalGate(null)
556
+ toast.error(err instanceof Error ? err.message : 'Unable to check eval gate')
557
+ } finally {
558
+ setEvalGateLoading(false)
559
+ }
560
+ }, [evalGateScope, selectedAgentId, selectedScenarioId, selectedSuite])
561
+
562
+ const loadReleaseReadiness = useCallback(async () => {
563
+ const params = new URLSearchParams({ range: '7d' })
564
+ if (selectedAgentId) {
565
+ params.set('agentId', selectedAgentId)
566
+ if (evalGateScope === 'scenario' && selectedScenarioId) params.set('scenarioId', selectedScenarioId)
567
+ if (evalGateScope === 'suite') params.set('suite', selectedSuite)
568
+ }
569
+ setReleaseReadinessLoading(true)
570
+ try {
571
+ const report = await api<ReleaseReadinessReport>('GET', `/quality/release-readiness?${params.toString()}`)
572
+ setReleaseReadiness(report)
573
+ } catch (err) {
574
+ setReleaseReadiness(null)
575
+ toast.error(err instanceof Error ? err.message : 'Unable to check release readiness')
576
+ } finally {
577
+ setReleaseReadinessLoading(false)
578
+ }
579
+ }, [evalGateScope, selectedAgentId, selectedScenarioId, selectedSuite])
580
+
286
581
  useEffect(() => {
287
582
  void loadQualityData()
288
583
  }, [loadQualityData])
@@ -301,6 +596,14 @@ export function QualityWorkspace() {
301
596
  void loadEvalEnvironmentPlan()
302
597
  }, [loadEvalEnvironmentPlan])
303
598
 
599
+ useEffect(() => {
600
+ void loadEvalGate()
601
+ }, [loadEvalGate])
602
+
603
+ useEffect(() => {
604
+ void loadReleaseReadiness()
605
+ }, [loadReleaseReadiness])
606
+
304
607
  useEffect(() => {
305
608
  if (!suites.some((suite) => suite.name === selectedSuite) && suites[0]) {
306
609
  setSelectedSuite(suites[0].name)
@@ -341,12 +644,14 @@ export function QualityWorkspace() {
341
644
  toast.success('Eval scenario completed')
342
645
  await loadQualityData({ silent: true })
343
646
  await loadEvalEnvironmentPlan()
647
+ await loadEvalGate()
648
+ await loadReleaseReadiness()
344
649
  } catch (err) {
345
650
  toast.error(err instanceof Error ? err.message : 'Eval scenario failed')
346
651
  } finally {
347
652
  setEvalBusy(null)
348
653
  }
349
- }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId, selectedScenarioId])
654
+ }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, loadReleaseReadiness, selectedAgentId, selectedScenarioId])
350
655
 
351
656
  const runSuite = useCallback(async (suiteName: string) => {
352
657
  if (!selectedAgentId) {
@@ -369,12 +674,39 @@ export function QualityWorkspace() {
369
674
  toast.success(`Suite completed at ${Math.round(result.percentage)}%`)
370
675
  await loadQualityData({ silent: true })
371
676
  await loadEvalEnvironmentPlan()
677
+ await loadEvalGate()
678
+ await loadReleaseReadiness()
372
679
  } catch (err) {
373
680
  toast.error(err instanceof Error ? err.message : 'Eval suite failed')
374
681
  } finally {
375
682
  setEvalBusy(null)
376
683
  }
377
- }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadQualityData, selectedAgentId])
684
+ }, [evalEnvironmentPlan, loadEvalEnvironmentPlan, loadEvalGate, loadQualityData, loadReleaseReadiness, selectedAgentId])
685
+
686
+ const setEvalBaseline = useCallback(async () => {
687
+ if (!selectedAgentId) {
688
+ toast.error('Choose an agent first')
689
+ return
690
+ }
691
+ if (evalGateScope === 'scenario' && !selectedScenarioId) {
692
+ toast.error('Choose a scenario first')
693
+ return
694
+ }
695
+ setEvalBaselineBusy(true)
696
+ try {
697
+ const body = evalGateScope === 'scenario'
698
+ ? { agentId: selectedAgentId, scenarioId: selectedScenarioId, minPercent: evalGate?.minPercent ?? 80, maxRegressionPoints: evalGate?.maxRegressionPoints ?? 5 }
699
+ : { agentId: selectedAgentId, suite: selectedSuite, minPercent: evalGate?.minPercent ?? 80, maxRegressionPoints: evalGate?.maxRegressionPoints ?? 5 }
700
+ const result = await api<{ gate: EvalGateResult }>('POST', '/eval/baselines', body)
701
+ setEvalGate(result.gate)
702
+ await loadReleaseReadiness()
703
+ toast.success('Eval baseline saved')
704
+ } catch (err) {
705
+ toast.error(err instanceof Error ? err.message : 'Unable to save eval baseline')
706
+ } finally {
707
+ setEvalBaselineBusy(false)
708
+ }
709
+ }, [evalGate, evalGateScope, loadReleaseReadiness, selectedAgentId, selectedScenarioId, selectedSuite])
378
710
 
379
711
  const actOnApproval = useCallback(async (approval: ApprovalRequest, approved: boolean) => {
380
712
  setApprovalBusy(approval.id)
@@ -382,12 +714,13 @@ export function QualityWorkspace() {
382
714
  await api('POST', '/approvals', { id: approval.id, approved })
383
715
  toast.success(approved ? 'Approval granted' : 'Approval denied')
384
716
  await loadQualityData({ silent: true })
717
+ await loadReleaseReadiness()
385
718
  } catch (err) {
386
719
  toast.error(err instanceof Error ? err.message : 'Unable to update approval')
387
720
  } finally {
388
721
  setApprovalBusy(null)
389
722
  }
390
- }, [loadQualityData])
723
+ }, [loadQualityData, loadReleaseReadiness])
391
724
 
392
725
  if (loading) {
393
726
  return (
@@ -451,6 +784,12 @@ export function QualityWorkspace() {
451
784
  {activeTab === 'overview' && (
452
785
  <div className="flex flex-col gap-6">
453
786
  <OperationsPulsePanel defaultRange="7d" compact />
787
+ <ReleaseReadinessPanel
788
+ report={releaseReadiness}
789
+ loading={releaseReadinessLoading}
790
+ onRefresh={() => void loadReleaseReadiness()}
791
+ onOpenHref={(href) => router.push(href)}
792
+ />
454
793
 
455
794
  <div className="grid gap-3 md:grid-cols-2 xl:grid-cols-4">
456
795
  <StatTile
@@ -600,6 +939,15 @@ export function QualityWorkspace() {
600
939
  loading={evalEnvironmentLoading}
601
940
  onRefresh={() => void loadEvalEnvironmentPlan({ refreshGateway: true })}
602
941
  />
942
+ <EvalGatePanel
943
+ gate={evalGate}
944
+ loading={evalGateLoading}
945
+ busy={evalBaselineBusy}
946
+ scope={evalGateScope}
947
+ onScopeChange={setEvalGateScope}
948
+ onRefresh={() => void loadEvalGate()}
949
+ onSetBaseline={() => void setEvalBaseline()}
950
+ />
603
951
  <button
604
952
  type="button"
605
953
  onClick={() => openMissionTemplate('release-candidate-qa')}