@cat-factory/app 0.39.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,9 @@ const META: Record<Notification['type'], { icon: string; color: Accent; action:
36
36
  // Clicking the title opens the human-testing window for the task (see `reveal`); "act" just
37
37
  // marks it read (the gate is resolved in that window — confirm / request a fix — not here).
38
38
  human_test_ready: { icon: 'i-lucide-user-check', color: 'primary', action: 'Mark read' },
39
+ // Clicking the title opens the visual-confirmation window for the task (see `reveal`); "act"
40
+ // just marks it read (the gate is resolved in that window — approve / request a fix — not here).
41
+ visual_confirmation_ready: { icon: 'i-lucide-camera', color: 'primary', action: 'Mark read' },
39
42
  // Clicking the title opens the task's gate window (where the human can request a freeform
40
43
  // fix); "act" just marks it read (approval happens on GitHub, not here).
41
44
  human_review: { icon: 'i-lucide-users', color: 'primary', action: 'Mark read' },
@@ -87,6 +90,7 @@ function reveal(n: Notification) {
87
90
  else if (n.type === 'clarity_review') ui.openClarityReview(n.blockId)
88
91
  else if (n.type === 'decision_required') revealDecision(n)
89
92
  else if (n.type === 'human_test_ready') revealHumanTest(n)
93
+ else if (n.type === 'visual_confirmation_ready') revealVisualConfirm(n)
90
94
  else if (n.type === 'human_review') revealHumanReview(n)
91
95
  else if (n.type === 'followup_pending') revealFollowUps(n)
92
96
  else ui.select(n.blockId)
@@ -128,6 +132,21 @@ function revealHumanTest(n: Notification) {
128
132
  else if (n.blockId) ui.select(n.blockId)
129
133
  }
130
134
 
135
+ /**
136
+ * Open the visual-confirmation window for a parked `visual-confirmation` gate: find the run's
137
+ * parked step and open it through the universal step dispatch (its archetype declares the
138
+ * `visual-confirm` result view). Falls back to focusing the block.
139
+ */
140
+ function revealVisualConfirm(n: Notification) {
141
+ const instance = n.executionId ? execution.getInstance(n.executionId) : undefined
142
+ const idx =
143
+ instance?.steps.findIndex(
144
+ (s) => s.agentKind === 'visual-confirmation' && s.state === 'waiting_decision',
145
+ ) ?? -1
146
+ if (instance && idx >= 0) ui.openStepDetail(instance.id, idx)
147
+ else if (n.blockId) ui.select(n.blockId)
148
+ }
149
+
131
150
  /**
132
151
  * Open the decision surface for a parked iteration-cap run: find the run's step that is
133
152
  * waiting on a human and open it through the universal step dispatch — which routes a
@@ -43,6 +43,13 @@ const headroomTone = computed(() => headroomColor(headroom.value, m.value.trunca
43
43
  <span class="tabular-nums text-slate-400" title="Prompt / completion tokens">
44
44
  {{ formatTokens(m.promptTokens) }}↑ {{ formatTokens(m.completionTokens) }}↓
45
45
  </span>
46
+ <span
47
+ v-if="(m.cachedPromptTokens ?? 0) > 0"
48
+ class="tabular-nums text-emerald-400/80"
49
+ title="Prompt tokens served from the provider's cache"
50
+ >
51
+ ({{ formatTokens(m.cachedPromptTokens ?? 0) }} cached)
52
+ </span>
46
53
  <div class="ml-auto flex items-center gap-1">
47
54
  <UBadge v-if="m.errors > 0" color="error" variant="subtle" size="sm">
48
55
  {{ m.errors }} error{{ m.errors === 1 ? '' : 's' }}
@@ -16,6 +16,7 @@ import ClarityReviewWindow from '~/components/clarity/ClarityReviewWindow.vue'
16
16
  import BrainstormWindow from '~/components/brainstorm/BrainstormWindow.vue'
17
17
  import TestReportWindow from '~/components/testing/TestReportWindow.vue'
18
18
  import HumanTestWindow from '~/components/humanTest/HumanTestWindow.vue'
19
+ import VisualConfirmationWindow from '~/components/visualConfirm/VisualConfirmationWindow.vue'
19
20
  import GateResultView from '~/components/gates/GateResultView.vue'
20
21
  import ConsensusSessionWindow from '~/components/consensus/ConsensusSessionWindow.vue'
21
22
  import GenericStructuredResultView from '~/components/panels/GenericStructuredResultView.vue'
@@ -32,6 +33,8 @@ const STEP_RESULT_VIEWS: Record<string, Component> = {
32
33
  tester: TestReportWindow,
33
34
  // The human-testing gate: env URL + confirm / request-fix / pull-main / recreate / destroy.
34
35
  'human-test': HumanTestWindow,
36
+ // The visual-confirmation gate: actual-vs-reference screenshot gallery + approve / request-fix.
37
+ 'visual-confirm': VisualConfirmationWindow,
35
38
  // Shared by both polling gates (`ci` + `conflicts`); the window branches on agentKind.
36
39
  gate: GateResultView,
37
40
  // Opened for any step that ran the consensus mechanism (routed in `ui.dispatchStepView`).
@@ -34,6 +34,13 @@ interface ProviderMeta {
34
34
  label: string
35
35
  url: string
36
36
  steps: string[]
37
+ /**
38
+ * Whether this provider caches the re-sent prompt prefix. Connecting a key here
39
+ * upgrades its models to the caching `direct` flavour, so a long agentic run stops
40
+ * re-billing its whole growing prompt every turn. Mirrors the backend
41
+ * `providerCachePolicy`; the gateways are pass-through (no caching we rely on yet).
42
+ */
43
+ caches?: boolean
37
44
  }
38
45
 
39
46
  /** Direct vendors: the key reaches that one vendor's own endpoint. */
@@ -46,6 +53,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
46
53
  'Open platform.openai.com → API keys and create a new secret key.',
47
54
  'Copy the key (starts with sk-…); it is shown only once.',
48
55
  ],
56
+ caches: true,
49
57
  },
50
58
  {
51
59
  value: 'anthropic',
@@ -55,6 +63,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
55
63
  'Open console.anthropic.com → Settings → API Keys and create a key.',
56
64
  'Copy the key (starts with sk-ant-…).',
57
65
  ],
66
+ caches: true,
58
67
  },
59
68
  {
60
69
  value: 'qwen',
@@ -64,6 +73,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
64
73
  'Open the DashScope console (international) → API-KEY and create a key.',
65
74
  'Copy the key; it authenticates the OpenAI-compatible Qwen endpoint.',
66
75
  ],
76
+ caches: true,
67
77
  },
68
78
  {
69
79
  value: 'deepseek',
@@ -73,6 +83,7 @@ const DIRECT_PROVIDERS: ProviderMeta[] = [
73
83
  'Open platform.deepseek.com → API keys and create a key.',
74
84
  'Copy the key (starts with sk-…).',
75
85
  ],
86
+ caches: true,
76
87
  },
77
88
  {
78
89
  value: 'moonshot',
@@ -268,6 +279,14 @@ async function remove(k: ApiKey) {
268
279
  </li>
269
280
  </ol>
270
281
 
282
+ <!-- caching capability: connecting a direct key that caches upgrades its models to
283
+ the caching flavour, so long agentic runs stop re-billing the whole prompt. -->
284
+ <p v-if="selected.caches" class="flex items-center gap-1.5 text-[12px] text-emerald-400/90">
285
+ <UIcon name="i-lucide-zap" class="h-3.5 w-3.5 shrink-0" />
286
+ Enables prompt caching for {{ selected.label }} models — a long multi-turn run reuses its
287
+ cached prompt prefix instead of re-sending it every turn.
288
+ </p>
289
+
271
290
  <!-- add form -->
272
291
  <div class="space-y-2">
273
292
  <UFormField label="Label (optional)">
@@ -15,7 +15,7 @@ import { onKeyStroke } from '@vueuse/core'
15
15
  import type { AgentKind } from '~/types/domain'
16
16
  import type { ModelPreset } from '~/types/model-presets'
17
17
  import { MODEL_CONFIGURABLE_SYSTEM_KINDS } from '~/utils/catalog'
18
- import { contextLabel, costLabel, displayFlavor, isSelectable } from '~/stores/models'
18
+ import { cachingLabel, contextLabel, costLabel, displayFlavor, isSelectable } from '~/stores/models'
19
19
 
20
20
  const ui = useUiStore()
21
21
  const models = useModelsStore()
@@ -84,7 +84,10 @@ const selectableModels = computed(() => {
84
84
  const flavor = displayFlavor(m, configured)
85
85
  const ctx = contextLabel(flavor.contextTokens)
86
86
  const price = costLabel(flavor) ?? (flavor.quotaBased ? 'quota' : undefined)
87
- const suffix = [flavor.providerLabel, ctx, price].filter(Boolean).join(' · ')
87
+ // Surface caching in the suffix: a cache-less flavour (the Workers-AI hot path)
88
+ // re-bills its whole growing prompt every turn, which the user can act on.
89
+ const caching = cachingLabel(flavor)
90
+ const suffix = [flavor.providerLabel, ctx, price, caching].filter(Boolean).join(' · ')
88
91
  return {
89
92
  id: m.id,
90
93
  label: m.label,
@@ -68,6 +68,7 @@ const draft = reactive({
68
68
  taskLimitShared: 5 as number,
69
69
  perType: {} as Record<CreateTaskType, number>,
70
70
  storeAgentContext: true,
71
+ artifactRetentionDays: 14,
71
72
  kaizenEnabled: true,
72
73
  // Budget: empty string ⇒ "use the built-in default" (null on the wire).
73
74
  spendCurrency: '',
@@ -82,6 +83,7 @@ function hydrate() {
82
83
  const pt = s.taskLimitPerType ?? {}
83
84
  for (const t of TASK_TYPES) draft.perType[t] = pt[t] ?? 3
84
85
  draft.storeAgentContext = s.storeAgentContext
86
+ draft.artifactRetentionDays = s.artifactRetentionDays
85
87
  draft.kaizenEnabled = s.kaizenEnabled
86
88
  draft.spendCurrency = s.spendCurrency ?? ''
87
89
  draft.spendMonthlyLimit = s.spendMonthlyLimit == null ? '' : String(s.spendMonthlyLimit)
@@ -111,6 +113,7 @@ async function save() {
111
113
  )
112
114
  : null,
113
115
  storeAgentContext: draft.storeAgentContext,
116
+ artifactRetentionDays: draft.artifactRetentionDays,
114
117
  kaizenEnabled: draft.kaizenEnabled,
115
118
  })
116
119
  toast.add({ title: 'Settings saved', icon: 'i-lucide-check', color: 'success' })
@@ -242,6 +245,28 @@ async function saveBudget() {
242
245
  </label>
243
246
  </section>
244
247
 
248
+ <!-- Visual-confirmation artifact retention -->
249
+ <section class="space-y-2">
250
+ <h3 class="text-sm font-semibold text-slate-200">Screenshot retention</h3>
251
+ <p class="text-[11px] text-slate-400">
252
+ How long to keep the UI tester’s captured screenshots and the reference design
253
+ images they’re reviewed against (the visual-confirmation gate). A daily cleanup job
254
+ deletes both the image bytes and their metadata once they age past this window.
255
+ </p>
256
+ <label class="block w-48">
257
+ <span class="mb-1 block text-[10px] uppercase tracking-wide text-slate-500">
258
+ Retention (days)
259
+ </span>
260
+ <UInput
261
+ v-model.number="draft.artifactRetentionDays"
262
+ type="number"
263
+ :min="1"
264
+ :max="3650"
265
+ size="sm"
266
+ />
267
+ </label>
268
+ </section>
269
+
245
270
  <!-- Kaizen agent -->
246
271
  <section class="space-y-2">
247
272
  <h3 class="text-sm font-semibold text-slate-200">Kaizen agent</h3>
@@ -28,6 +28,7 @@ const ROUTABLE: { type: NotificationType; label: string }[] = [
28
28
  { type: 'clarity_review', label: 'Clarity review' },
29
29
  { type: 'release_regression', label: 'Release regression' },
30
30
  { type: 'human_test_ready', label: 'Ready for human testing' },
31
+ { type: 'visual_confirmation_ready', label: 'Ready for visual confirmation' },
31
32
  ]
32
33
 
33
34
  /** Notification-role options for a mapped member (drives who gets @-mentioned). */
@@ -45,6 +46,7 @@ const routes = reactive<Record<NotificationType, SlackRoute>>({
45
46
  // In-app only (not in ROUTABLE), but the map is exhaustive over the type.
46
47
  decision_required: { enabled: false, channel: '' },
47
48
  human_test_ready: { enabled: false, channel: '' },
49
+ visual_confirmation_ready: { enabled: false, channel: '' },
48
50
  human_review: { enabled: false, channel: '' },
49
51
  followup_pending: { enabled: false, channel: '' },
50
52
  })
@@ -0,0 +1,357 @@
1
+ <script setup lang="ts">
2
+ // Visual-confirmation gate window — the dedicated surface for a `visual-confirmation` step
3
+ // (opened via the universal result-view host, the same seam the human-test / tester windows
4
+ // use). It reads the gate's live state off the execution step (`step.visualConfirm`, pushed
5
+ // over the stream), renders each captured screenshot next to its reference design (paired by
6
+ // view), and drives the human actions: approve (advance), request a fix from findings (the
7
+ // Tester's fixer), or recapture (refresh the pairs). It also lets the human upload reference
8
+ // design images for the task.
9
+ import { onUnmounted, reactive, ref, watch } from 'vue'
10
+ import type { VisualConfirmStepState } from '~/types/execution'
11
+ import StepRunMeta from '~/components/panels/StepRunMeta.vue'
12
+
13
+ const board = useBoardStore()
14
+ const execution = useExecutionStore()
15
+ const visualConfirm = useVisualConfirmStore()
16
+
17
+ // Release the cached screenshot/reference object URLs when the window goes away, so the
18
+ // (potentially large) blob bytes don't linger in memory for the rest of the session.
19
+ onUnmounted(() => visualConfirm.revokeBlobs())
20
+
21
+ const { open, blockId, instanceId, stepIndex, close } = useResultView('visual-confirm')
22
+ const block = computed(() => (blockId.value ? board.getBlock(blockId.value) : undefined))
23
+
24
+ const instance = computed(() =>
25
+ instanceId.value === null ? null : (execution.getInstance(instanceId.value) ?? null),
26
+ )
27
+ const step = computed(() => {
28
+ if (instance.value === null || stepIndex.value === null) return null
29
+ return instance.value.steps[stepIndex.value] ?? null
30
+ })
31
+ const vc = computed<VisualConfirmStepState | null>(() => step.value?.visualConfirm ?? null)
32
+ const phase = computed(() => vc.value?.phase ?? null)
33
+ const pairs = computed(() => vc.value?.pairs ?? [])
34
+ const busy = computed(() => (blockId.value ? visualConfirm.isBusy(blockId.value) : false))
35
+ const awaitingHuman = computed(() => phase.value === 'awaiting_human')
36
+ const working = computed(() => phase.value === 'fixing')
37
+
38
+ const PHASE_LABEL: Record<NonNullable<VisualConfirmStepState['phase']>, string> = {
39
+ awaiting_human: 'Awaiting your review',
40
+ fixing: 'Fixer is addressing your findings…',
41
+ approved: 'Approved',
42
+ }
43
+
44
+ // Resolve each pair's artifact ids to object URLs for the <img>s (cached in the store).
45
+ const urls = reactive<Record<string, string>>({})
46
+ async function resolveUrl(id: string | null | undefined) {
47
+ if (!id || urls[id]) return
48
+ const url = await visualConfirm.blobUrl(id)
49
+ if (url) urls[id] = url
50
+ }
51
+ watch(
52
+ pairs,
53
+ (next) => {
54
+ for (const p of next) {
55
+ void resolveUrl(p.actualArtifactId)
56
+ void resolveUrl(p.referenceArtifactId)
57
+ }
58
+ },
59
+ { immediate: true },
60
+ )
61
+
62
+ const findings = ref('')
63
+ const showFindings = ref(false)
64
+
65
+ // When the gate flags its screenshots as an unreliable basis (`degradedReason` — no capture
66
+ // happened, a fix failed, or a fix landed AFTER these shots were taken), approving is no longer
67
+ // a safe one-click: require the human to explicitly acknowledge they reviewed the change another
68
+ // way (or recaptured) first. Re-armed whenever the reason changes so a fresh warning re-gates.
69
+ const ackDegraded = ref(false)
70
+ watch(
71
+ () => vc.value?.degradedReason ?? null,
72
+ () => {
73
+ ackDegraded.value = false
74
+ },
75
+ )
76
+ const needsAck = computed(() => !!vc.value?.degradedReason)
77
+ const canApprove = computed(
78
+ () => awaitingHuman.value && !busy.value && (!needsAck.value || ackDegraded.value),
79
+ )
80
+
81
+ async function approve() {
82
+ if (!blockId.value || !canApprove.value) return
83
+ await visualConfirm.approve(blockId.value)
84
+ close()
85
+ }
86
+ async function submitFix() {
87
+ if (!blockId.value || !findings.value.trim()) return
88
+ await visualConfirm.requestFix(blockId.value, findings.value.trim())
89
+ findings.value = ''
90
+ showFindings.value = false
91
+ }
92
+ async function recapture() {
93
+ if (!blockId.value) return
94
+ await visualConfirm.recapture(blockId.value)
95
+ }
96
+
97
+ // Reference upload.
98
+ const uploadView = ref('')
99
+ const fileInput = ref<HTMLInputElement | null>(null)
100
+ async function onFilePicked(e: Event) {
101
+ const input = e.target as HTMLInputElement
102
+ const file = input.files?.[0]
103
+ if (!file || !blockId.value) return
104
+ await visualConfirm.uploadReference(blockId.value, file, uploadView.value.trim())
105
+ uploadView.value = ''
106
+ if (fileInput.value) fileInput.value.value = ''
107
+ }
108
+ </script>
109
+
110
+ <template>
111
+ <Teleport to="body">
112
+ <div
113
+ v-if="open"
114
+ class="fixed inset-0 z-50 flex items-stretch justify-center bg-slate-950/70 backdrop-blur-sm"
115
+ @click.self="close"
116
+ >
117
+ <div
118
+ class="m-4 flex w-full max-w-4xl flex-col overflow-hidden rounded-2xl border border-slate-800 bg-slate-900 shadow-2xl"
119
+ >
120
+ <header class="flex items-center gap-3 border-b border-slate-800 px-5 py-3">
121
+ <span
122
+ class="flex h-8 w-8 items-center justify-center rounded-lg bg-amber-500/15 text-amber-300"
123
+ >
124
+ <UIcon name="i-lucide-image-play" class="h-4 w-4" />
125
+ </span>
126
+ <div class="min-w-0 flex-1">
127
+ <h2 class="truncate text-sm font-semibold text-slate-100">
128
+ Visual confirmation{{ block ? ` — ${block.title}` : '' }}
129
+ </h2>
130
+ <p class="truncate text-[11px] text-slate-400">
131
+ {{ phase ? PHASE_LABEL[phase] : 'Review the UI against the reference designs' }}
132
+ </p>
133
+ </div>
134
+ <button
135
+ class="rounded-md p-1.5 text-slate-400 hover:bg-slate-800 hover:text-slate-200"
136
+ @click="close"
137
+ >
138
+ <UIcon name="i-lucide-x" class="h-4 w-4" />
139
+ </button>
140
+ </header>
141
+
142
+ <div class="flex min-h-0 flex-1 flex-col gap-4 overflow-y-auto px-5 py-4">
143
+ <div
144
+ v-if="!vc"
145
+ class="flex flex-col items-center justify-center gap-2 py-10 text-center text-slate-400"
146
+ >
147
+ <UIcon name="i-lucide-image-play" class="h-8 w-8 opacity-40" />
148
+ <p class="text-sm">This step hasn't started yet.</p>
149
+ </div>
150
+
151
+ <template v-else>
152
+ <p
153
+ v-if="vc.degradedReason"
154
+ class="rounded-lg border border-amber-700/40 bg-amber-500/5 px-3 py-2 text-[12px] text-amber-300/90"
155
+ >
156
+ {{ vc.degradedReason }}
157
+ </p>
158
+
159
+ <p
160
+ v-if="working"
161
+ class="flex items-center gap-2 rounded-lg border border-slate-800 bg-slate-950/40 px-3 py-2 text-[12px] text-slate-300"
162
+ >
163
+ <UIcon name="i-lucide-loader" class="h-3.5 w-3.5 animate-spin text-amber-300" />
164
+ {{ phase ? PHASE_LABEL[phase] : '' }}
165
+ </p>
166
+
167
+ <!-- Actual-vs-reference gallery -->
168
+ <section v-if="pairs.length" class="space-y-4">
169
+ <div
170
+ v-for="(p, i) in pairs"
171
+ :key="i"
172
+ class="rounded-lg border border-slate-800 bg-slate-900/60 p-3"
173
+ >
174
+ <h3 class="mb-2 text-[12px] font-semibold text-slate-200">{{ p.view }}</h3>
175
+ <div class="grid grid-cols-2 gap-3">
176
+ <figure class="space-y-1">
177
+ <figcaption class="text-[10px] uppercase tracking-wide text-slate-500">
178
+ Actual
179
+ </figcaption>
180
+ <img
181
+ v-if="p.actualArtifactId && urls[p.actualArtifactId]"
182
+ :src="urls[p.actualArtifactId]"
183
+ :alt="`${p.view} (actual)`"
184
+ class="w-full rounded border border-slate-800"
185
+ />
186
+ <div
187
+ v-else
188
+ class="flex h-32 items-center justify-center rounded border border-dashed border-slate-700 text-[11px] text-slate-600"
189
+ >
190
+ {{ p.actualArtifactId ? 'Loading…' : 'Not captured' }}
191
+ </div>
192
+ </figure>
193
+ <figure class="space-y-1">
194
+ <figcaption class="text-[10px] uppercase tracking-wide text-slate-500">
195
+ Reference
196
+ </figcaption>
197
+ <img
198
+ v-if="p.referenceArtifactId && urls[p.referenceArtifactId]"
199
+ :src="urls[p.referenceArtifactId]"
200
+ :alt="`${p.view} (reference)`"
201
+ class="w-full rounded border border-slate-800"
202
+ />
203
+ <div
204
+ v-else
205
+ class="flex h-32 items-center justify-center rounded border border-dashed border-slate-700 text-[11px] text-slate-600"
206
+ >
207
+ {{ p.referenceArtifactId ? 'Loading…' : 'No reference' }}
208
+ </div>
209
+ </figure>
210
+ </div>
211
+ </div>
212
+ </section>
213
+ <p v-else class="text-[12px] italic text-slate-500">
214
+ No screenshots were captured — review the change manually.
215
+ </p>
216
+
217
+ <!-- Reference upload -->
218
+ <section class="rounded-lg border border-slate-800 bg-slate-900/60 p-3">
219
+ <h3 class="mb-2 text-[11px] font-semibold uppercase tracking-wide text-slate-500">
220
+ Upload a reference design
221
+ </h3>
222
+ <div class="flex flex-wrap items-center gap-2">
223
+ <input
224
+ v-model="uploadView"
225
+ placeholder="View name (e.g. login)"
226
+ class="rounded-md border border-slate-700 bg-slate-950 px-2 py-1 text-[12px] text-slate-200 placeholder:text-slate-600"
227
+ />
228
+ <input
229
+ ref="fileInput"
230
+ type="file"
231
+ accept="image/png,image/jpeg"
232
+ :disabled="busy"
233
+ class="text-[12px] text-slate-300 file:mr-2 file:rounded file:border-0 file:bg-slate-800 file:px-2 file:py-1 file:text-slate-200"
234
+ @change="onFilePicked"
235
+ />
236
+ </div>
237
+ </section>
238
+
239
+ <!-- Request fix -->
240
+ <section
241
+ v-if="awaitingHuman"
242
+ class="rounded-lg border border-slate-800 bg-slate-900/60 p-3"
243
+ >
244
+ <div class="flex items-center justify-between">
245
+ <h3 class="text-[11px] font-semibold uppercase tracking-wide text-slate-500">
246
+ Needs changes?
247
+ </h3>
248
+ <button
249
+ class="text-[12px] text-slate-400 hover:text-slate-200"
250
+ @click="showFindings = !showFindings"
251
+ >
252
+ {{ showFindings ? 'Cancel' : 'Request a fix' }}
253
+ </button>
254
+ </div>
255
+ <div v-if="showFindings" class="mt-2 space-y-2">
256
+ <textarea
257
+ v-model="findings"
258
+ rows="4"
259
+ placeholder="Describe what looks wrong — the Fixer agent gets this as context."
260
+ class="w-full rounded-md border border-slate-700 bg-slate-950 px-3 py-2 text-[13px] text-slate-200 placeholder:text-slate-600 focus:border-amber-500 focus:outline-none"
261
+ />
262
+ <UButton
263
+ size="sm"
264
+ color="warning"
265
+ icon="i-lucide-wrench"
266
+ :loading="busy"
267
+ :disabled="busy || !findings.trim()"
268
+ @click="submitFix"
269
+ >
270
+ Send to Fixer
271
+ </UButton>
272
+ </div>
273
+ </section>
274
+
275
+ <!-- Rounds history -->
276
+ <section
277
+ v-if="vc.rounds && vc.rounds.length"
278
+ class="rounded-lg border border-slate-800 bg-slate-900/60 p-3"
279
+ >
280
+ <h3 class="mb-2 text-[11px] font-semibold uppercase tracking-wide text-slate-500">
281
+ History ({{ vc.attempts }} round{{ vc.attempts === 1 ? '' : 's' }})
282
+ </h3>
283
+ <ol class="space-y-2">
284
+ <li v-for="(r, i) in vc.rounds" :key="i" class="flex items-start gap-2 text-[12px]">
285
+ <UIcon
286
+ name="i-lucide-wrench"
287
+ class="mt-0.5 h-3.5 w-3.5 shrink-0 text-slate-400"
288
+ />
289
+ <div class="min-w-0 flex-1">
290
+ <span class="text-slate-200">Fix requested</span>
291
+ <span
292
+ class="ml-1.5 rounded px-1 text-[10px] uppercase"
293
+ :class="
294
+ r.outcome === 'completed'
295
+ ? 'bg-emerald-500/15 text-emerald-300'
296
+ : r.outcome === 'failed'
297
+ ? 'bg-rose-500/15 text-rose-300'
298
+ : 'bg-slate-500/15 text-slate-300'
299
+ "
300
+ >
301
+ {{ r.outcome ?? 'in progress' }}
302
+ </span>
303
+ <p v-if="r.findings" class="leading-snug text-slate-400">{{ r.findings }}</p>
304
+ </div>
305
+ </li>
306
+ </ol>
307
+ </section>
308
+ </template>
309
+ </div>
310
+
311
+ <footer
312
+ v-if="vc"
313
+ class="flex items-center justify-between gap-3 border-t border-slate-800 px-5 py-3"
314
+ >
315
+ <StepRunMeta
316
+ v-if="step"
317
+ :step="step"
318
+ :instance-id="instanceId ?? undefined"
319
+ :step-number="stepIndex === null ? undefined : stepIndex + 1"
320
+ :total-steps="instance?.steps.length"
321
+ :run-failed="instance?.status === 'failed'"
322
+ :failure-at="instance?.failure?.occurredAt"
323
+ />
324
+ <div class="flex items-center gap-2">
325
+ <label
326
+ v-if="awaitingHuman && needsAck"
327
+ class="flex items-center gap-1.5 text-[11px] text-amber-300/90"
328
+ >
329
+ <input v-model="ackDegraded" type="checkbox" class="accent-amber-500" />
330
+ I've reviewed this manually
331
+ </label>
332
+ <UButton
333
+ size="sm"
334
+ variant="soft"
335
+ color="neutral"
336
+ icon="i-lucide-refresh-cw"
337
+ :loading="busy"
338
+ :disabled="busy || !awaitingHuman"
339
+ @click="recapture"
340
+ >
341
+ Recapture
342
+ </UButton>
343
+ <UButton
344
+ color="primary"
345
+ icon="i-lucide-circle-check"
346
+ :loading="busy"
347
+ :disabled="!canApprove"
348
+ @click="approve"
349
+ >
350
+ Approve — continue
351
+ </UButton>
352
+ </div>
353
+ </footer>
354
+ </div>
355
+ </div>
356
+ </Teleport>
357
+ </template>
@@ -0,0 +1,60 @@
1
+ import {
2
+ approveVisualConfirmContract,
3
+ recaptureVisualConfirmContract,
4
+ requestVisualConfirmFixContract,
5
+ } from '@cat-factory/contracts'
6
+ import type { ApiContext } from './context'
7
+
8
+ /**
9
+ * The visual-confirmation gate's run-driving actions + the artifact helpers its window needs
10
+ * (upload a reference design image, fetch a stored blob as an object URL). The action calls
11
+ * return the updated execution instance (the gate state rides on its current step and also
12
+ * arrives live via the execution stream). The blob/upload helpers use the authed `$fetch`
13
+ * (the artifact ingest/blob endpoints are raw, not contract-modelled, because they carry binary).
14
+ */
15
+ export function visualConfirmApi({ send, ws, http }: ApiContext) {
16
+ return {
17
+ // Approve the reviewed screenshots: advance the pipeline.
18
+ approveVisualConfirm: (workspaceId: string, blockId: string) =>
19
+ send(approveVisualConfirmContract, { pathPrefix: ws(workspaceId), pathParams: { blockId } }),
20
+
21
+ // Submit findings and request a fix (dispatches the Tester's fixer, then re-parks).
22
+ requestVisualConfirmFix: (workspaceId: string, blockId: string, findings: string) =>
23
+ send(requestVisualConfirmFixContract, {
24
+ pathPrefix: ws(workspaceId),
25
+ pathParams: { blockId },
26
+ body: { findings },
27
+ }),
28
+
29
+ // Refresh the actual-vs-reference pairs from the latest UI-tester report.
30
+ recaptureVisualConfirm: (workspaceId: string, blockId: string) =>
31
+ send(recaptureVisualConfirmContract, {
32
+ pathPrefix: ws(workspaceId),
33
+ pathParams: { blockId },
34
+ }),
35
+
36
+ // Upload a reference design image for a block (kind=reference), tagged with its view name.
37
+ uploadReferenceArtifact: async (
38
+ workspaceId: string,
39
+ blockId: string,
40
+ file: File,
41
+ view: string,
42
+ ): Promise<{ artifact: { id: string } }> => {
43
+ const form = new FormData()
44
+ form.append('file', file)
45
+ form.append('kind', 'reference')
46
+ form.append('blockId', blockId)
47
+ if (view) form.append('view', view)
48
+ return http(`${ws(workspaceId)}/artifacts`, { method: 'POST', body: form })
49
+ },
50
+
51
+ // Fetch a stored artifact's bytes and turn them into an object URL for an <img>.
52
+ fetchArtifactBlobUrl: async (workspaceId: string, artifactId: string): Promise<string> => {
53
+ const blob: Blob = await http(
54
+ `${ws(workspaceId)}/artifacts/${encodeURIComponent(artifactId)}/blob`,
55
+ { method: 'GET', responseType: 'blob' },
56
+ )
57
+ return URL.createObjectURL(blob)
58
+ },
59
+ }
60
+ }
@@ -12,6 +12,7 @@ import { fragmentsApi } from './api/fragments'
12
12
  import { githubApi } from './api/github'
13
13
  import { humanReviewApi } from './api/humanReview'
14
14
  import { humanTestApi } from './api/humanTest'
15
+ import { visualConfirmApi } from './api/visualConfirm'
15
16
  import { kaizenApi } from './api/kaizen'
16
17
  import { localSettingsApi } from './api/localSettings'
17
18
  import { modelsApi } from './api/models'
@@ -98,6 +99,7 @@ export function useApi() {
98
99
  ...reviewsApi(ctx),
99
100
  ...followUpsApi(ctx),
100
101
  ...humanTestApi(ctx),
102
+ ...visualConfirmApi(ctx),
101
103
  ...humanReviewApi(ctx),
102
104
  ...kaizenApi(ctx),
103
105
  ...localSettingsApi(ctx),
@@ -49,7 +49,9 @@ const BUILTIN_SEED_KINDS = [
49
49
  'reviewer',
50
50
  'blueprints',
51
51
  'mocker',
52
- 'tester',
52
+ 'tester-api',
53
+ 'tester-ui',
54
+ 'visual-confirmation',
53
55
  'conflicts',
54
56
  'ci',
55
57
  'merger',
@@ -86,7 +88,7 @@ describe('usePipelineHealth', () => {
86
88
  'coder',
87
89
  'reviewer',
88
90
  'blueprints',
89
- 'tester',
91
+ 'tester-api',
90
92
  'conflicts',
91
93
  'ci',
92
94
  'merger',
@@ -12,6 +12,13 @@ export interface DisplayFlavor {
12
12
  /** True ⇒ flat-rate quota; its cost is a quota burn rate, not budget spend. */
13
13
  quotaBased: boolean
14
14
  vendor?: SubscriptionVendor
15
+ /**
16
+ * Whether this flavour's provider caches the re-sent prompt prefix. False on a
17
+ * Cloudflare/Workers-AI flavour (the hot path re-bills the whole prompt every turn);
18
+ * true once a direct key upgrades the model to its caching `direct` flavour. Undefined
19
+ * ⇒ unknown (older catalog). Surfaced as a badge in the picker.
20
+ */
21
+ cachesPrompts?: boolean
15
22
  }
16
23
 
17
24
  /**
@@ -30,6 +37,7 @@ export function displayFlavor(m: ModelOption, configured: Set<SubscriptionVendor
30
37
  cost: m.subscription.cost,
31
38
  quotaBased: true,
32
39
  vendor: m.subscription.vendor,
40
+ cachesPrompts: m.subscription.cachesPrompts,
33
41
  }
34
42
  }
35
43
  return {
@@ -40,6 +48,7 @@ export function displayFlavor(m: ModelOption, configured: Set<SubscriptionVendor
40
48
  cost: m.cost,
41
49
  quotaBased: m.quotaBased ?? false,
42
50
  vendor: m.vendor,
51
+ cachesPrompts: m.cachesPrompts,
43
52
  }
44
53
  }
45
54
 
@@ -69,6 +78,20 @@ export function costLabel(flavor: DisplayFlavor): string | undefined {
69
78
  return flavor.quotaBased ? `quota burn ~${body}` : body
70
79
  }
71
80
 
81
+ /**
82
+ * A short caching label for the picker: whether the flavour's provider caches the
83
+ * re-sent prompt prefix. `null` when unknown (older catalog) so the caller can omit it
84
+ * entirely. A long agentic run on a non-caching flavour re-bills its whole growing
85
+ * prompt every turn (slower, more rate-limited), so we surface it as an informational
86
+ * hint the user can act on (connect a direct key / pick a caching model). The model
87
+ * picker is a text-only dropdown-menu item list, so this is a label token in the option
88
+ * suffix rather than a styled badge.
89
+ */
90
+ export function cachingLabel(flavor: DisplayFlavor): string | null {
91
+ if (flavor.cachesPrompts === undefined) return null
92
+ return flavor.cachesPrompts ? 'Prompt caching' : 'No prompt caching'
93
+ }
94
+
72
95
  /**
73
96
  * The model picker catalog. Served by `GET /models`, where each model is already
74
97
  * resolved to the flavour in use for this deployment (direct when the provider's
@@ -0,0 +1,92 @@
1
+ import { defineStore } from 'pinia'
2
+ import { ref } from 'vue'
3
+ import { useExecutionStore } from '~/stores/execution'
4
+ import { useWorkspaceStore } from '~/stores/workspace'
5
+
6
+ /**
7
+ * Visual-confirmation gate actions. The gate's live state rides on its execution step
8
+ * (`step.visualConfirm`) and arrives via the execution stream, so this store holds NO gate
9
+ * state — it only drives the actions (approve / request a fix / recapture), uploads reference
10
+ * design images, and resolves stored artifacts into object URLs for the gallery. A per-block
11
+ * `busy` flag lets the window disable its controls while an action is in flight.
12
+ */
13
+ export const useVisualConfirmStore = defineStore('visualConfirm', () => {
14
+ const api = useApi()
15
+ const ws = useWorkspaceStore()
16
+ const execution = useExecutionStore()
17
+
18
+ const busy = ref<Set<string>>(new Set())
19
+ /** Cache of artifactId → object URL, so the gallery doesn't re-fetch the same blob. */
20
+ const blobUrls = ref<Map<string, string>>(new Map())
21
+
22
+ function isBusy(blockId: string): boolean {
23
+ return busy.value.has(blockId)
24
+ }
25
+
26
+ async function run(blockId: string, action: () => Promise<unknown>): Promise<void> {
27
+ const next = new Set(busy.value)
28
+ next.add(blockId)
29
+ busy.value = next
30
+ try {
31
+ const instance = await action()
32
+ if (instance && typeof instance === 'object' && 'steps' in instance) {
33
+ execution.upsert(instance as Parameters<typeof execution.upsert>[0])
34
+ }
35
+ } finally {
36
+ const after = new Set(busy.value)
37
+ after.delete(blockId)
38
+ busy.value = after
39
+ }
40
+ }
41
+
42
+ /** Approve the reviewed screenshots: advance the pipeline. */
43
+ function approve(blockId: string): Promise<void> {
44
+ return run(blockId, () => api.approveVisualConfirm(ws.requireId(), blockId))
45
+ }
46
+
47
+ /** Submit findings and request a fix. */
48
+ function requestFix(blockId: string, findings: string): Promise<void> {
49
+ return run(blockId, () => api.requestVisualConfirmFix(ws.requireId(), blockId, findings))
50
+ }
51
+
52
+ /** Refresh the actual-vs-reference pairs from the latest UI-tester report. */
53
+ function recapture(blockId: string): Promise<void> {
54
+ return run(blockId, () => api.recaptureVisualConfirm(ws.requireId(), blockId))
55
+ }
56
+
57
+ /** Upload a reference design image for a block, tagged with the view it depicts. */
58
+ function uploadReference(blockId: string, file: File, view: string): Promise<void> {
59
+ return run(blockId, () => api.uploadReferenceArtifact(ws.requireId(), blockId, file, view))
60
+ }
61
+
62
+ /** Resolve a stored artifact to an object URL (cached). Returns null on failure. */
63
+ async function blobUrl(artifactId: string): Promise<string | null> {
64
+ const cached = blobUrls.value.get(artifactId)
65
+ if (cached) return cached
66
+ try {
67
+ const url = await api.fetchArtifactBlobUrl(ws.requireId(), artifactId)
68
+ blobUrls.value.set(artifactId, url)
69
+ return url
70
+ } catch {
71
+ return null
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Release every cached object URL and clear the cache. `URL.createObjectURL` holds the
77
+ * blob in memory until explicitly revoked, so the gate window calls this on unmount to
78
+ * avoid leaking the (potentially large) screenshot bytes for the session's lifetime.
79
+ */
80
+ function revokeBlobs(): void {
81
+ for (const url of blobUrls.value.values()) {
82
+ try {
83
+ URL.revokeObjectURL(url)
84
+ } catch {
85
+ // Ignore — a URL already revoked / unsupported environment.
86
+ }
87
+ }
88
+ blobUrls.value = new Map()
89
+ }
90
+
91
+ return { isBusy, approve, requestFix, recapture, uploadReference, blobUrl, revokeBlobs }
92
+ })
@@ -10,6 +10,7 @@ const DEFAULTS: WorkspaceSettings = {
10
10
  taskLimitShared: null,
11
11
  taskLimitPerType: null,
12
12
  storeAgentContext: true,
13
+ artifactRetentionDays: 14,
13
14
  kaizenEnabled: true,
14
15
  delegateAgentsToRunnerPool: false,
15
16
  delegateTestEnvToProvider: false,
@@ -37,6 +37,9 @@ export type {
37
37
  RunEnvironment,
38
38
  HumanTestRound,
39
39
  HumanTestStepState,
40
+ VisualConfirmStepState,
41
+ VisualConfirmPair,
42
+ VisualConfirmRound,
40
43
  ExecutionInstance,
41
44
  // The historical frontend name for a per-block review comment is the contract's
42
45
  // StepReviewComment; the env-status union is the contract's EnvironmentStatus.
@@ -21,7 +21,8 @@ const AGENT_KINDS: AgentKind[] = [
21
21
  'architect',
22
22
  'researcher',
23
23
  'coder',
24
- 'tester',
24
+ 'tester-api',
25
+ 'tester-ui',
25
26
  'reviewer',
26
27
  'documenter',
27
28
  'integrator',
@@ -32,6 +33,7 @@ const AGENT_KINDS: AgentKind[] = [
32
33
  'business-documenter',
33
34
  'business-reviewer',
34
35
  'human-test',
36
+ 'visual-confirmation',
35
37
  ]
36
38
  const BLOCK_TYPES: BlockType[] = [
37
39
  'frontend',
@@ -134,8 +134,8 @@ export const AGENT_ARCHETYPES: AgentArchetype[] = [
134
134
  description: 'Builds WireMock mocks for external services and wires them into local/CI runs.',
135
135
  },
136
136
  {
137
- kind: 'tester',
138
- label: 'Tester',
137
+ kind: 'tester-api',
138
+ label: 'API Tester',
139
139
  icon: 'i-lucide-flask-conical',
140
140
  color: '#fbbf24',
141
141
  category: 'test',
@@ -144,6 +144,17 @@ export const AGENT_ARCHETYPES: AgentArchetype[] = [
144
144
  // concerns tree) instead of the generic prose step-detail panel.
145
145
  resultView: 'tester',
146
146
  },
147
+ {
148
+ kind: 'tester-ui',
149
+ label: 'UI Tester',
150
+ icon: 'i-lucide-camera',
151
+ color: '#fbbf24',
152
+ category: 'test',
153
+ description:
154
+ 'Drives a real browser through the new UI, captures a screenshot of each view, and reports outcomes.',
155
+ // Same structured test-report window; it additionally renders the captured screenshots.
156
+ resultView: 'tester',
157
+ },
147
158
  {
148
159
  kind: 'playwright',
149
160
  label: 'Acceptance Test Author',
@@ -165,6 +176,18 @@ export const AGENT_ARCHETYPES: AgentArchetype[] = [
165
176
  // recreate / destroy) instead of the generic prose step-detail panel.
166
177
  resultView: 'human-test',
167
178
  },
179
+ {
180
+ kind: 'visual-confirmation',
181
+ label: 'Visual Confirmation',
182
+ icon: 'i-lucide-image-play',
183
+ color: '#f59e0b',
184
+ category: 'test',
185
+ description:
186
+ 'Pauses for a person to review the UI tester’s screenshots against the uploaded reference designs — approve, or request a fix from findings — before the pipeline continues.',
187
+ // Opens the dedicated visual-confirmation window (actual-vs-reference gallery + approve /
188
+ // request-fix / recapture) instead of the generic prose step-detail panel.
189
+ resultView: 'visual-confirm',
190
+ },
168
191
  {
169
192
  kind: 'documenter',
170
193
  label: 'Documenter',
@@ -83,7 +83,7 @@ export const COMPANION_STATE_META: Record<
83
83
  * via `step.gate`, which all share the same possible/running/completed/skipped shape.
84
84
  */
85
85
  export function gateCompanionFor(step: PipelineStep, runFailed = false): GateCompanion | null {
86
- if (step.agentKind === 'tester') {
86
+ if (step.agentKind === 'tester-api' || step.agentKind === 'tester-ui') {
87
87
  const attempts = step.test?.attempts ?? 0
88
88
  if (step.state === 'done') {
89
89
  // The gate finished: it ran the fixer iff it ever dispatched one.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/app",
3
- "version": "0.39.0",
3
+ "version": "0.41.0",
4
4
  "description": "Reusable Nuxt layer for the Agent Architecture Board SPA (components, stores, composables, pages). Consume it from a thin deployment app via `extends: ['@cat-factory/app']` and point it at your backend with NUXT_PUBLIC_API_BASE. See deploy/frontend for an example.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,7 +32,7 @@
32
32
  "pinia-plugin-persistedstate": "^4.7.1",
33
33
  "vue": "^3.5.38",
34
34
  "wretch": "^3.0.9",
35
- "@cat-factory/contracts": "0.38.0"
35
+ "@cat-factory/contracts": "0.40.0"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@toad-contracts/testing": "0.3.1",