@cat-factory/app 0.30.5 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,11 @@ useTaskExpansion(boardEl)
44
44
  // moving one never shifts another. The frame being dragged is lifted to the top,
45
45
  // then the hovered frame (the un-obscured one under the pointer), so overlapping
46
46
  // services can always be reached and reordered. See useFrameStacking.
47
+ //
48
+ // `elevate-nodes-on-select` is turned OFF on <VueFlow> for this to work: Vue Flow's
49
+ // default adds +1000 to a selected node's z-index, so a frame stayed pinned on top
50
+ // after a click and no amount of hovering another frame could surface it. Stacking
51
+ // is driven purely by hover/drag here; the selection highlight is the ring, not z.
47
52
  function frameZIndex(id: string) {
48
53
  if (draggingId.value === id) return 1000
49
54
  if (hoveredFrameId.value === id) return 100
@@ -163,6 +168,7 @@ async function onDrop(event: DragEvent) {
163
168
  :max-zoom="3"
164
169
  :default-viewport="{ x: 40, y: 20, zoom: 0.85 }"
165
170
  :pan-on-drag="[0, 2]"
171
+ :elevate-nodes-on-select="false"
166
172
  fit-view-on-init
167
173
  @node-click="onNodeClick"
168
174
  @node-double-click="onNodeDoubleClick"
@@ -0,0 +1,208 @@
1
+ <script setup lang="ts">
2
+ import { computed, watch } from 'vue'
3
+ import { onKeyStroke } from '@vueuse/core'
4
+ import type { KaizenGrading } from '~/types/domain'
5
+ import { agentKindMeta } from '~/utils/catalog'
6
+
7
+ // The Kaizen screen: a full-panel overlay listing the workspace's grading history and
8
+ // its verified-combo library. Opened via `ui.openKaizen()` from the sidebar. Read-only —
9
+ // grading is scheduled by the engine and run by the background sweep, never from here.
10
+ const ui = useUiStore()
11
+ const kaizen = useKaizenStore()
12
+
13
+ const open = computed(() => ui.kaizenScreenOpen)
14
+
15
+ watch(open, (isOpen) => {
16
+ if (isOpen) void kaizen.loadOverview()
17
+ })
18
+
19
+ function close() {
20
+ ui.closeKaizen()
21
+ }
22
+ onKeyStroke('Escape', () => {
23
+ if (open.value) close()
24
+ })
25
+
26
+ function meta(kind: string) {
27
+ return agentKindMeta(kind)
28
+ }
29
+ function when(ms: number): string {
30
+ return new Date(ms).toLocaleString()
31
+ }
32
+ function gradeTone(g: KaizenGrading): string {
33
+ if (g.status === 'failed') return 'text-slate-500'
34
+ if (g.grade == null) return 'text-slate-400'
35
+ if (g.grade >= 5) return 'text-emerald-400'
36
+ if (g.grade >= 4) return 'text-lime-400'
37
+ if (g.grade === 3) return 'text-amber-400'
38
+ return 'text-rose-400'
39
+ }
40
+ function statusLabel(g: KaizenGrading): string {
41
+ if (g.status === 'scheduled') return 'Scheduled'
42
+ if (g.status === 'running') return 'Grading…'
43
+ if (g.status === 'failed') return 'Failed'
44
+ return g.grade != null ? `${g.grade}/5` : 'Graded'
45
+ }
46
+ </script>
47
+
48
+ <template>
49
+ <Teleport to="body">
50
+ <Transition name="kz-fade">
51
+ <div
52
+ v-if="open"
53
+ class="fixed inset-0 z-[60] flex flex-col bg-slate-950/96 backdrop-blur-sm"
54
+ role="dialog"
55
+ aria-modal="true"
56
+ >
57
+ <header class="flex items-center gap-3 border-b border-slate-800 px-6 py-4">
58
+ <div class="flex h-9 w-9 shrink-0 items-center justify-center rounded-lg bg-teal-500/15">
59
+ <UIcon name="i-lucide-sparkles" class="h-5 w-5 text-teal-400" />
60
+ </div>
61
+ <div class="min-w-0">
62
+ <h1 class="truncate text-base font-semibold text-white">Kaizen</h1>
63
+ <p class="truncate text-xs text-slate-500">
64
+ Continuous-improvement grading of agent runs
65
+ </p>
66
+ </div>
67
+ <div class="ml-auto flex items-center gap-2">
68
+ <UButton
69
+ icon="i-lucide-refresh-cw"
70
+ size="xs"
71
+ color="neutral"
72
+ variant="ghost"
73
+ :loading="kaizen.loadingOverview"
74
+ @click="kaizen.loadOverview()"
75
+ >
76
+ Refresh
77
+ </UButton>
78
+ <UButton icon="i-lucide-x" size="xs" color="neutral" variant="ghost" @click="close">
79
+ Close
80
+ </UButton>
81
+ </div>
82
+ </header>
83
+
84
+ <div
85
+ v-if="kaizen.available === false"
86
+ class="flex flex-1 items-center justify-center text-sm text-slate-500"
87
+ >
88
+ Kaizen is not configured on this deployment.
89
+ </div>
90
+
91
+ <div v-else class="grid flex-1 grid-cols-1 gap-6 overflow-auto p-6 lg:grid-cols-3">
92
+ <!-- Verified combos -->
93
+ <section class="lg:col-span-1">
94
+ <h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
95
+ <UIcon name="i-lucide-badge-check" class="h-4 w-4 text-emerald-400" />
96
+ Verified combos
97
+ <span class="text-xs font-normal text-slate-500">({{ kaizen.verifiedCount }})</span>
98
+ </h2>
99
+ <p class="mb-3 text-[11px] text-slate-500">
100
+ A prompt + agent + model combination that graded 4 or 5 with no recommendations
101
+ five times in a row. These are no longer graded.
102
+ </p>
103
+ <ul class="space-y-2">
104
+ <li
105
+ v-for="c in kaizen.verified"
106
+ :key="c.comboKey"
107
+ class="rounded-lg border border-slate-800 bg-slate-900/40 p-2.5"
108
+ >
109
+ <div class="flex items-center gap-2">
110
+ <UIcon
111
+ :name="meta(c.agentKind).icon"
112
+ class="h-3.5 w-3.5 shrink-0"
113
+ :style="{ color: meta(c.agentKind).color }"
114
+ />
115
+ <span class="text-xs font-medium text-slate-200">{{
116
+ meta(c.agentKind).label
117
+ }}</span>
118
+ <UIcon
119
+ v-if="c.verified"
120
+ name="i-lucide-badge-check"
121
+ class="ml-auto h-3.5 w-3.5 text-emerald-400"
122
+ />
123
+ <span v-else class="ml-auto text-[11px] text-slate-500">
124
+ {{ c.consecutiveHighGrades }}/5
125
+ </span>
126
+ </div>
127
+ <div class="mt-1 truncate text-[11px] text-slate-500" :title="c.model">
128
+ {{ c.model }} · prompt v{{ c.promptVersion }}
129
+ </div>
130
+ </li>
131
+ <li v-if="kaizen.verified.length === 0" class="text-xs text-slate-600">
132
+ No combos yet.
133
+ </li>
134
+ </ul>
135
+ </section>
136
+
137
+ <!-- Grading history -->
138
+ <section class="lg:col-span-2">
139
+ <h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
140
+ <UIcon name="i-lucide-history" class="h-4 w-4 text-teal-400" />
141
+ Grading history
142
+ </h2>
143
+ <div class="overflow-hidden rounded-lg border border-slate-800">
144
+ <table class="w-full text-left text-xs">
145
+ <thead class="bg-slate-900/60 text-[11px] uppercase tracking-wide text-slate-500">
146
+ <tr>
147
+ <th class="px-3 py-2 font-medium">When</th>
148
+ <th class="px-3 py-2 font-medium">Agent</th>
149
+ <th class="px-3 py-2 font-medium">Model</th>
150
+ <th class="px-3 py-2 font-medium">Grade</th>
151
+ <th class="px-3 py-2 font-medium">Recommendations</th>
152
+ </tr>
153
+ </thead>
154
+ <tbody class="divide-y divide-slate-800/70">
155
+ <tr v-for="g in kaizen.history" :key="g.id" class="align-top">
156
+ <td class="whitespace-nowrap px-3 py-2 text-slate-500">
157
+ {{ when(g.createdAt) }}
158
+ </td>
159
+ <td class="px-3 py-2">
160
+ <span class="flex items-center gap-1.5">
161
+ <UIcon
162
+ :name="meta(g.agentKind).icon"
163
+ class="h-3.5 w-3.5"
164
+ :style="{ color: meta(g.agentKind).color }"
165
+ />
166
+ <span class="text-slate-200">{{ meta(g.agentKind).label }}</span>
167
+ <span class="text-slate-600">v{{ g.promptVersion }}</span>
168
+ </span>
169
+ </td>
170
+ <td class="max-w-[12rem] truncate px-3 py-2 text-slate-400" :title="g.model">
171
+ {{ g.model }}
172
+ </td>
173
+ <td class="whitespace-nowrap px-3 py-2 font-semibold" :class="gradeTone(g)">
174
+ {{ statusLabel(g) }}
175
+ </td>
176
+ <td class="px-3 py-2 text-slate-400">
177
+ <ul v-if="g.recommendations.length" class="list-disc space-y-0.5 pl-4">
178
+ <li v-for="(r, i) in g.recommendations" :key="i">{{ r }}</li>
179
+ </ul>
180
+ <span v-else-if="g.status === 'complete'" class="text-slate-600">—</span>
181
+ <span v-else-if="g.error" class="text-rose-400/80">{{ g.error }}</span>
182
+ </td>
183
+ </tr>
184
+ <tr v-if="kaizen.history.length === 0">
185
+ <td colspan="5" class="px-3 py-6 text-center text-slate-600">
186
+ No gradings yet.
187
+ </td>
188
+ </tr>
189
+ </tbody>
190
+ </table>
191
+ </div>
192
+ </section>
193
+ </div>
194
+ </div>
195
+ </Transition>
196
+ </Teleport>
197
+ </template>
198
+
199
+ <style scoped>
200
+ .kz-fade-enter-active,
201
+ .kz-fade-leave-active {
202
+ transition: opacity 0.15s ease;
203
+ }
204
+ .kz-fade-enter-from,
205
+ .kz-fade-leave-to {
206
+ opacity: 0;
207
+ }
208
+ </style>
@@ -0,0 +1,94 @@
1
+ <script setup lang="ts">
2
+ import { computed, watch } from 'vue'
3
+
4
+ // Per-step Kaizen grading status, shown inside the run window (NOT on the board). Reads
5
+ // the grading for this run's step from the kaizen store, lazily loading the run's
6
+ // gradings on first mount, and renders the scheduled→running→complete status plus the
7
+ // grade, summary and recommendations once available.
8
+ const props = defineProps<{
9
+ /** The run (execution) id. */
10
+ instanceId: string | null | undefined
11
+ /** The step's index within the run. */
12
+ stepIndex: number | null | undefined
13
+ }>()
14
+
15
+ const kaizen = useKaizenStore()
16
+
17
+ const grading = computed(() => {
18
+ if (!props.instanceId || props.stepIndex == null) return null
19
+ return kaizen.gradingForStep(props.instanceId, props.stepIndex)
20
+ })
21
+
22
+ // Load the run's gradings once when we have an id and nothing cached yet. The stream
23
+ // keeps them live afterwards.
24
+ watch(
25
+ () => props.instanceId,
26
+ (id) => {
27
+ if (id && kaizen.gradingsFor(id).length === 0 && kaizen.available !== false) {
28
+ void kaizen.loadForExecution(id)
29
+ }
30
+ },
31
+ { immediate: true },
32
+ )
33
+
34
+ const tone = computed(() => {
35
+ const g = grading.value
36
+ if (!g || g.grade == null) return 'text-slate-400'
37
+ if (g.grade >= 5) return 'text-emerald-400'
38
+ if (g.grade >= 4) return 'text-lime-400'
39
+ if (g.grade === 3) return 'text-amber-400'
40
+ return 'text-rose-400'
41
+ })
42
+ </script>
43
+
44
+ <template>
45
+ <section v-if="grading" class="rounded-xl border border-slate-800 bg-slate-900/50 p-4">
46
+ <div class="flex items-center gap-2">
47
+ <UIcon name="i-lucide-sparkles" class="h-4 w-4 text-teal-400" />
48
+ <h3 class="text-sm font-semibold text-slate-200">Kaizen grading</h3>
49
+ <span class="ml-auto flex items-center gap-1.5 text-xs">
50
+ <template v-if="grading.status === 'scheduled'">
51
+ <UIcon name="i-lucide-clock" class="h-3.5 w-3.5 text-slate-500" />
52
+ <span class="text-slate-400">Scheduled</span>
53
+ </template>
54
+ <template v-else-if="grading.status === 'running'">
55
+ <UIcon name="i-lucide-loader-circle" class="h-3.5 w-3.5 animate-spin text-teal-400" />
56
+ <span class="text-teal-300">Grading…</span>
57
+ </template>
58
+ <template v-else-if="grading.status === 'failed'">
59
+ <UIcon name="i-lucide-circle-alert" class="h-3.5 w-3.5 text-rose-400" />
60
+ <span class="text-rose-400">Failed</span>
61
+ </template>
62
+ <template v-else>
63
+ <span class="font-semibold" :class="tone">{{ grading.grade }}/5</span>
64
+ </template>
65
+ </span>
66
+ </div>
67
+
68
+ <p v-if="grading.status === 'scheduled'" class="mt-2 text-[11px] text-slate-500">
69
+ A Kaizen grading is queued for this step. It runs in the background after the run.
70
+ </p>
71
+
72
+ <template v-else-if="grading.status === 'complete'">
73
+ <p v-if="grading.summary" class="mt-2 text-xs text-slate-300">{{ grading.summary }}</p>
74
+ <div v-if="grading.recommendations.length" class="mt-2">
75
+ <p class="text-[11px] font-medium uppercase tracking-wide text-slate-500">
76
+ Recommendations
77
+ </p>
78
+ <ul class="mt-1 list-disc space-y-0.5 pl-4 text-xs text-slate-300">
79
+ <li v-for="(r, i) in grading.recommendations" :key="i">{{ r }}</li>
80
+ </ul>
81
+ </div>
82
+ <p v-else class="mt-2 text-[11px] text-emerald-400/80">
83
+ Smooth interaction — nothing to improve.
84
+ </p>
85
+ <p v-if="grading.graderModel" class="mt-2 text-[10px] text-slate-600">
86
+ Graded by {{ grading.graderModel }}
87
+ </p>
88
+ </template>
89
+
90
+ <p v-else-if="grading.status === 'failed'" class="mt-2 text-[11px] text-rose-400/80">
91
+ {{ grading.error ?? 'The grading could not be completed.' }}
92
+ </p>
93
+ </section>
94
+ </template>
@@ -138,6 +138,18 @@ watch(
138
138
  >
139
139
  Sandbox
140
140
  </UButton>
141
+ <!-- The Kaizen screen: grading history + verified prompt/agent/model combos. -->
142
+ <UButton
143
+ block
144
+ color="primary"
145
+ variant="soft"
146
+ size="sm"
147
+ icon="i-lucide-sparkles"
148
+ class="justify-start"
149
+ @click="ui.openKaizen()"
150
+ >
151
+ Kaizen
152
+ </UButton>
141
153
  </div>
142
154
  </section>
143
155
 
@@ -313,6 +313,12 @@ async function copyOutput() {
313
313
  />
314
314
  </section>
315
315
 
316
+ <!-- post-run Kaizen grading status + results for this step (run-details only) -->
317
+ <KaizenStepStatus
318
+ :instance-id="ctx?.instanceId ?? null"
319
+ :step-index="ctx?.stepIndex ?? null"
320
+ />
321
+
316
322
  <!-- companion rework budget spent: the shared iteration-cap decision
317
323
  (one more round / proceed with the current output / stop & reset) -->
318
324
  <IterationCapPrompt
@@ -68,6 +68,7 @@ const draft = reactive({
68
68
  taskLimitShared: 5 as number,
69
69
  perType: {} as Record<CreateTaskType, number>,
70
70
  storeAgentContext: true,
71
+ kaizenEnabled: true,
71
72
  // Budget: empty string ⇒ "use the built-in default" (null on the wire).
72
73
  spendCurrency: '',
73
74
  spendMonthlyLimit: '',
@@ -81,6 +82,7 @@ function hydrate() {
81
82
  const pt = s.taskLimitPerType ?? {}
82
83
  for (const t of TASK_TYPES) draft.perType[t] = pt[t] ?? 3
83
84
  draft.storeAgentContext = s.storeAgentContext
85
+ draft.kaizenEnabled = s.kaizenEnabled
84
86
  draft.spendCurrency = s.spendCurrency ?? ''
85
87
  draft.spendMonthlyLimit = s.spendMonthlyLimit == null ? '' : String(s.spendMonthlyLimit)
86
88
  }
@@ -107,6 +109,7 @@ async function save() {
107
109
  )
108
110
  : null,
109
111
  storeAgentContext: draft.storeAgentContext,
112
+ kaizenEnabled: draft.kaizenEnabled,
110
113
  })
111
114
  toast.add({ title: 'Settings saved', icon: 'i-lucide-check', color: 'success' })
112
115
  } catch (e) {
@@ -237,6 +240,23 @@ async function saveBudget() {
237
240
  </label>
238
241
  </section>
239
242
 
243
+ <!-- Kaizen agent -->
244
+ <section class="space-y-2">
245
+ <h3 class="text-sm font-semibold text-slate-200">Kaizen agent</h3>
246
+ <p class="text-[11px] text-slate-400">
247
+ After each run completes, the Kaizen agent grades how every agent step went — smooth
248
+ and efficient vs confused and chaotic — and recommends prompt/model improvements. A
249
+ prompt + agent + model combination that grades highly with no recommendations five
250
+ times in a row is marked verified and is no longer graded. Grading runs in the
251
+ background and is shown inside run details and the Kaizen screen. Set the grader's
252
+ model in Model Configuration (the “Kaizen” agent).
253
+ </p>
254
+ <label class="flex items-center gap-2">
255
+ <USwitch v-model="draft.kaizenEnabled" size="sm" />
256
+ <span class="text-sm text-slate-200">Grade agent runs with Kaizen</span>
257
+ </label>
258
+ </section>
259
+
240
260
  <div class="flex justify-end">
241
261
  <UButton
242
262
  color="primary"
@@ -0,0 +1,16 @@
1
+ import type { KaizenGrading, KaizenOverview } from '~/types/domain'
2
+ import type { ApiContext } from './context'
3
+
4
+ /** Kaizen (post-run grading) read endpoints: the screen overview + a run's gradings. */
5
+ export function kaizenApi({ http, ws }: ApiContext) {
6
+ return {
7
+ // The Kaizen screen: recent grading history + the verified-combo library.
8
+ getKaizenOverview: (workspaceId: string) => http<KaizenOverview>(`${ws(workspaceId)}/kaizen`),
9
+
10
+ // The gradings recorded for one run (the run-window status surface).
11
+ getKaizenForExecution: (workspaceId: string, executionId: string) =>
12
+ http<{ gradings: KaizenGrading[] }>(
13
+ `${ws(workspaceId)}/executions/${encodeURIComponent(executionId)}/kaizen`,
14
+ ),
15
+ }
16
+ }
@@ -9,6 +9,7 @@ import { executionApi } from './api/execution'
9
9
  import { fragmentsApi } from './api/fragments'
10
10
  import { githubApi } from './api/github'
11
11
  import { humanTestApi } from './api/humanTest'
12
+ import { kaizenApi } from './api/kaizen'
12
13
  import { modelsApi } from './api/models'
13
14
  import { notificationsApi } from './api/notifications'
14
15
  import { presetsApi } from './api/presets'
@@ -85,6 +86,7 @@ export function useApi() {
85
86
  ...tasksApi(ctx),
86
87
  ...reviewsApi(ctx),
87
88
  ...humanTestApi(ctx),
89
+ ...kaizenApi(ctx),
88
90
  ...specApi(ctx),
89
91
  ...notificationsApi(ctx),
90
92
  ...presetsApi(ctx),
@@ -23,6 +23,7 @@ export function useWorkspaceStream() {
23
23
  const requirements = useRequirementsStore()
24
24
  const consensus = useConsensusStore()
25
25
  const clarity = useClarityStore()
26
+ const kaizen = useKaizenStore()
26
27
  const api = useApi()
27
28
  const apiBase = useRuntimeConfig().public.apiBase
28
29
 
@@ -87,6 +88,11 @@ export function useWorkspaceStream() {
87
88
  // cache so an open review window / inspector reflects it live ("incorporating…" → the
88
89
  // next cycle / converged). The summons back, when needed, arrives as a `notification`.
89
90
  clarity.upsert(event.review)
91
+ } else if (event.type === 'kaizen') {
92
+ // A post-run Kaizen grading was scheduled, started or completed — fold it into the
93
+ // run cache (so an open run window shows scheduled→running→complete live) and the
94
+ // Kaizen screen history. Never surfaced on the board.
95
+ kaizen.upsert(event.grading)
90
96
  }
91
97
  }
92
98
 
@@ -11,6 +11,7 @@ import DecisionModal from '~/components/panels/DecisionModal.vue'
11
11
  import AgentStepDetail from '~/components/panels/AgentStepDetail.vue'
12
12
  import StepResultViewHost from '~/components/panels/StepResultViewHost.vue'
13
13
  import ObservabilityPanel from '~/components/panels/ObservabilityPanel.vue'
14
+ import KaizenPanel from '~/components/kaizen/KaizenPanel.vue'
14
15
  import BlockFocusView from '~/components/focus/BlockFocusView.vue'
15
16
  import DocumentSourceConnectModal from '~/components/documents/DocumentSourceConnectModal.vue'
16
17
  import DocumentImportModal from '~/components/documents/DocumentImportModal.vue'
@@ -171,6 +172,7 @@ watch(
171
172
  <AgentStepDetail />
172
173
  <StepResultViewHost />
173
174
  <ObservabilityPanel />
175
+ <KaizenPanel />
174
176
  <DocumentSourceConnectModal />
175
177
  <DocumentImportModal />
176
178
  <SpawnPreviewModal />
@@ -0,0 +1,101 @@
1
+ import { defineStore } from 'pinia'
2
+ import { computed, ref } from 'vue'
3
+ import type { KaizenGrading, KaizenVerifiedCombo } from '~/types/domain'
4
+ import { useWorkspaceStore } from '~/stores/workspace'
5
+
6
+ /**
7
+ * Kaizen state: per-run gradings (for the run-window status surface) and the
8
+ * workspace-wide history + verified-combo library (for the Kaizen screen). Gradings
9
+ * arrive both via lazy loads and live over the workspace stream (`upsert`). Never
10
+ * surfaced on the board — only inside run details + the dedicated screen.
11
+ */
12
+ export const useKaizenStore = defineStore('kaizen', () => {
13
+ const api = useApi()
14
+
15
+ /** Gradings keyed by run (execution) id, for the run window. */
16
+ const byExecution = ref<Record<string, KaizenGrading[]>>({})
17
+ /** Recent grading history for the Kaizen screen. */
18
+ const history = ref<KaizenGrading[]>([])
19
+ /** The verified-combo library for the Kaizen screen. */
20
+ const verified = ref<KaizenVerifiedCombo[]>([])
21
+ const loadingOverview = ref(false)
22
+ const loadingExecution = ref<Set<string>>(new Set())
23
+ /** 503 ⇒ the Kaizen feature isn't configured on this deployment. */
24
+ const available = ref<boolean | null>(null)
25
+
26
+ function gradingsFor(executionId: string): KaizenGrading[] {
27
+ return byExecution.value[executionId] ?? []
28
+ }
29
+
30
+ /** The grading for a specific step of a run, if any. */
31
+ function gradingForStep(executionId: string, stepIndex: number): KaizenGrading | null {
32
+ return gradingsFor(executionId).find((g) => g.stepIndex === stepIndex) ?? null
33
+ }
34
+
35
+ async function loadOverview() {
36
+ const ws = useWorkspaceStore()
37
+ loadingOverview.value = true
38
+ try {
39
+ const overview = await api.getKaizenOverview(ws.requireId())
40
+ history.value = overview.gradings
41
+ verified.value = overview.verified
42
+ available.value = true
43
+ } catch (e) {
44
+ if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
45
+ available.value = false
46
+ else throw e
47
+ } finally {
48
+ loadingOverview.value = false
49
+ }
50
+ }
51
+
52
+ async function loadForExecution(executionId: string) {
53
+ const ws = useWorkspaceStore()
54
+ loadingExecution.value = new Set(loadingExecution.value).add(executionId)
55
+ try {
56
+ const { gradings } = await api.getKaizenForExecution(ws.requireId(), executionId)
57
+ byExecution.value = { ...byExecution.value, [executionId]: gradings }
58
+ available.value = true
59
+ } catch (e) {
60
+ if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
61
+ available.value = false
62
+ else throw e
63
+ } finally {
64
+ const next = new Set(loadingExecution.value)
65
+ next.delete(executionId)
66
+ loadingExecution.value = next
67
+ }
68
+ }
69
+
70
+ /** Fold a grading pushed over the stream into both the run cache and the screen history. */
71
+ function upsert(grading: KaizenGrading) {
72
+ const current = byExecution.value[grading.executionId] ?? []
73
+ const replaced = current.some((g) => g.id === grading.id)
74
+ const nextRun = replaced
75
+ ? current.map((g) => (g.id === grading.id ? grading : g))
76
+ : [...current, grading]
77
+ byExecution.value = { ...byExecution.value, [grading.executionId]: nextRun }
78
+ // Keep the screen history live too (newest first), if it's been loaded.
79
+ const inHistory = history.value.some((g) => g.id === grading.id)
80
+ if (inHistory) history.value = history.value.map((g) => (g.id === grading.id ? grading : g))
81
+ else history.value = [grading, ...history.value]
82
+ }
83
+
84
+ const isLoadingExecution = (executionId: string) => loadingExecution.value.has(executionId)
85
+ const verifiedCount = computed(() => verified.value.filter((c) => c.verified).length)
86
+
87
+ return {
88
+ byExecution,
89
+ history,
90
+ verified,
91
+ available,
92
+ loadingOverview,
93
+ verifiedCount,
94
+ gradingsFor,
95
+ gradingForStep,
96
+ loadOverview,
97
+ loadForExecution,
98
+ upsert,
99
+ isLoadingExecution,
100
+ }
101
+ })
package/app/stores/ui.ts CHANGED
@@ -149,6 +149,10 @@ export const useUiStore = defineStore('ui', () => {
149
149
  // per-call detail from the observability store on open.
150
150
  const observabilityInstanceId = ref<string | null>(null)
151
151
 
152
+ // The Kaizen screen (grading history + verified-combo library), a full-panel overlay
153
+ // opened from the sidebar. Distinct from the per-run grading status shown in run details.
154
+ const kaizenScreenOpen = ref(false)
155
+
152
156
  /** Current canvas zoom (driven by Vue Flow viewport). */
153
157
  const zoom = ref(1)
154
158
 
@@ -473,6 +477,12 @@ export const useUiStore = defineStore('ui', () => {
473
477
  function closeObservability() {
474
478
  observabilityInstanceId.value = null
475
479
  }
480
+ function openKaizen() {
481
+ kaizenScreenOpen.value = true
482
+ }
483
+ function closeKaizen() {
484
+ kaizenScreenOpen.value = false
485
+ }
476
486
 
477
487
  return {
478
488
  selectedBlockId,
@@ -513,6 +523,7 @@ export const useUiStore = defineStore('ui', () => {
513
523
  closeResultView,
514
524
  stepDetail,
515
525
  observabilityInstanceId,
526
+ kaizenScreenOpen,
516
527
  zoom,
517
528
  lod,
518
529
  expandedFrames,
@@ -589,5 +600,7 @@ export const useUiStore = defineStore('ui', () => {
589
600
  closeStepDetail,
590
601
  openObservability,
591
602
  closeObservability,
603
+ openKaizen,
604
+ closeKaizen,
592
605
  }
593
606
  })
@@ -10,6 +10,7 @@ const DEFAULTS: WorkspaceSettings = {
10
10
  taskLimitShared: null,
11
11
  taskLimitPerType: null,
12
12
  storeAgentContext: true,
13
+ kaizenEnabled: true,
13
14
  spendCurrency: null,
14
15
  spendMonthlyLimit: null,
15
16
  }
@@ -309,6 +309,9 @@ export type AgentKind =
309
309
  // validate the change in a live URL, dispatching the Tester's `fixer` (from findings) or
310
310
  // the `conflict-resolver` (on a conflicting pull-main) on demand. Opens its own window.
311
311
  | 'human-test'
312
+ // The Kaizen agent: post-run grader (NOT a pipeline step / palette archetype). Surfaced
313
+ // only in Model Configuration (its model is pinnable like any agent) and run details.
314
+ | 'kaizen'
312
315
 
313
316
  /** A draggable agent definition shown in the agent palette. */
314
317
  /** Palette grouping for the agent archetypes (collapsible sections in the builder). */
@@ -495,6 +498,8 @@ export interface WorkspaceSettings {
495
498
  taskLimitPerType: Partial<Record<CreateTaskType, number>> | null
496
499
  /** Whether to store the complete provided-context snapshot for each container agent. */
497
500
  storeAgentContext: boolean
501
+ /** Whether the Kaizen agent grades agent steps after each run. On by default. */
502
+ kaizenEnabled: boolean
498
503
  /** Spend budget currency (ISO 4217). Null ⇒ the built-in default (`EUR`). */
499
504
  spendCurrency: string | null
500
505
  /** Monthly spend budget in `spendCurrency`. Null ⇒ the built-in default. */
@@ -508,6 +513,7 @@ export interface UpdateWorkspaceSettingsInput {
508
513
  taskLimitShared?: number | null
509
514
  taskLimitPerType?: Partial<Record<CreateTaskType, number>> | null
510
515
  storeAgentContext?: boolean
516
+ kaizenEnabled?: boolean
511
517
  spendCurrency?: string | null
512
518
  spendMonthlyLimit?: number | null
513
519
  }
@@ -520,6 +526,50 @@ export interface ServiceFragmentDefaults {
520
526
  fragmentIds: string[]
521
527
  }
522
528
 
529
+ /** Lifecycle of a Kaizen grading. Mirrors `@cat-factory/contracts`. */
530
+ export type KaizenGradingStatus = 'scheduled' | 'running' | 'complete' | 'failed'
531
+
532
+ /**
533
+ * A Kaizen grading of one completed agent step (how smooth/efficient the interaction
534
+ * was, 1..5, plus recommendations). Mirrors `@cat-factory/contracts`.
535
+ */
536
+ export interface KaizenGrading {
537
+ id: string
538
+ executionId: string
539
+ blockId: string
540
+ stepIndex: number
541
+ agentKind: string
542
+ model: string
543
+ promptVersion: number
544
+ comboKey: string
545
+ status: KaizenGradingStatus
546
+ grade: number | null
547
+ summary: string
548
+ recommendations: string[]
549
+ graderModel: string | null
550
+ error: string | null
551
+ createdAt: number
552
+ updatedAt: number
553
+ }
554
+
555
+ /** A `(promptVersion, agentKind, model)` combo's verification progress. */
556
+ export interface KaizenVerifiedCombo {
557
+ comboKey: string
558
+ agentKind: string
559
+ model: string
560
+ promptVersion: number
561
+ consecutiveHighGrades: number
562
+ verified: boolean
563
+ verifiedAt: number | null
564
+ updatedAt: number
565
+ }
566
+
567
+ /** The Kaizen screen payload: recent grading history + the verified-combo library. */
568
+ export interface KaizenOverview {
569
+ gradings: KaizenGrading[]
570
+ verified: KaizenVerifiedCombo[]
571
+ }
572
+
523
573
  /**
524
574
  * Real-time events pushed over the workspace WebSocket stream (see
525
575
  * `useWorkspaceStream`). Mirrors `WorkspaceEvent` in `@cat-factory/contracts`.
@@ -533,6 +583,7 @@ export type WorkspaceEvent =
533
583
  | { type: 'requirements'; review: RequirementReview; at: number }
534
584
  | { type: 'consensus'; session: ConsensusSession; at: number }
535
585
  | { type: 'clarity'; review: ClarityReview; at: number }
586
+ | { type: 'kaizen'; grading: KaizenGrading; at: number }
536
587
 
537
588
  /** Level-of-detail buckets driven by the canvas zoom level. Shallow → deep:
538
589
  * `far`/`mid`/`close` govern a service frame (chip → card → opened with tasks);
@@ -330,6 +330,17 @@ export const SYSTEM_AGENT_META: Record<string, AgentArchetype> = {
330
330
  color: '#a3e635',
331
331
  description: 'Scores the PR and auto-merges within the task thresholds, or asks for review.',
332
332
  },
333
+ // The Kaizen agent grades agent steps AFTER a run completes (continuous improvement).
334
+ // It is NOT a pipeline step (never in the palette — no `category`), but it runs an LLM,
335
+ // so it needs display metadata here and a per-workspace model in Model Configuration.
336
+ kaizen: {
337
+ kind: 'kaizen',
338
+ label: 'Kaizen',
339
+ icon: 'i-lucide-sparkles',
340
+ color: '#2dd4bf',
341
+ description:
342
+ 'Grades each completed agent step (smooth vs chaotic) after a run and recommends prompt/model improvements.',
343
+ },
333
344
  // A polling gate (no model of its own) that watches the released PR's observability
334
345
  // signals after merge and escalates to the on-call agent on a regression. NOT in any
335
346
  // default pipeline and NOT a standing palette archetype — the palette surfaces it
@@ -365,9 +376,15 @@ export const OBSERVABILITY_GATE_ARCHETYPE: AgentArchetype =
365
376
  * default model would do nothing for them.
366
377
  */
367
378
  export const MODEL_CONFIGURABLE_SYSTEM_KINDS: AgentArchetype[] = [
368
- ...['spec-writer', 'blueprints', 'conflict-resolver', 'ci-fixer', 'fixer', 'merger'].map(
369
- (kind) => SYSTEM_AGENT_META[kind]!,
370
- ),
379
+ ...[
380
+ 'spec-writer',
381
+ 'blueprints',
382
+ 'conflict-resolver',
383
+ 'ci-fixer',
384
+ 'fixer',
385
+ 'merger',
386
+ 'kaizen',
387
+ ].map((kind) => SYSTEM_AGENT_META[kind]!),
371
388
  // Companions run LLMs but aren't palette-addable (they're producer toggles), so include
372
389
  // them here to keep their per-workspace default model pinnable in the Model Defaults panel.
373
390
  ...COMPANION_ARCHETYPES,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/app",
3
- "version": "0.30.5",
3
+ "version": "0.31.0",
4
4
  "description": "Reusable Nuxt layer for the Agent Architecture Board SPA (components, stores, composables, pages). Consume it from a thin deployment app via `extends: ['@cat-factory/app']` and point it at your backend with NUXT_PUBLIC_API_BASE. See deploy/frontend for an example.",
5
5
  "repository": {
6
6
  "type": "git",