@cat-factory/app 0.30.6 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/components/kaizen/KaizenPanel.vue +208 -0
- package/app/components/kaizen/KaizenStepStatus.vue +94 -0
- package/app/components/layout/SideBar.vue +12 -0
- package/app/components/panels/AgentStepDetail.vue +6 -0
- package/app/components/settings/WorkspaceSettingsPanel.vue +20 -0
- package/app/composables/api/kaizen.ts +16 -0
- package/app/composables/useApi.ts +2 -0
- package/app/composables/useWorkspaceStream.ts +6 -0
- package/app/pages/index.vue +2 -0
- package/app/stores/kaizen.ts +101 -0
- package/app/stores/ui.ts +13 -0
- package/app/stores/workspaceSettings.ts +1 -0
- package/app/types/domain.ts +51 -0
- package/app/utils/catalog.ts +20 -3
- package/package.json +1 -1
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
<script setup lang="ts">
|
|
2
|
+
import { computed, watch } from 'vue'
|
|
3
|
+
import { onKeyStroke } from '@vueuse/core'
|
|
4
|
+
import type { KaizenGrading } from '~/types/domain'
|
|
5
|
+
import { agentKindMeta } from '~/utils/catalog'
|
|
6
|
+
|
|
7
|
+
// The Kaizen screen: a full-panel overlay listing the workspace's grading history and
|
|
8
|
+
// its verified-combo library. Opened via `ui.openKaizen()` from the sidebar. Read-only —
|
|
9
|
+
// grading is scheduled by the engine and run by the background sweep, never from here.
|
|
10
|
+
const ui = useUiStore()
|
|
11
|
+
const kaizen = useKaizenStore()
|
|
12
|
+
|
|
13
|
+
const open = computed(() => ui.kaizenScreenOpen)
|
|
14
|
+
|
|
15
|
+
watch(open, (isOpen) => {
|
|
16
|
+
if (isOpen) void kaizen.loadOverview()
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
function close() {
|
|
20
|
+
ui.closeKaizen()
|
|
21
|
+
}
|
|
22
|
+
onKeyStroke('Escape', () => {
|
|
23
|
+
if (open.value) close()
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
function meta(kind: string) {
|
|
27
|
+
return agentKindMeta(kind)
|
|
28
|
+
}
|
|
29
|
+
function when(ms: number): string {
|
|
30
|
+
return new Date(ms).toLocaleString()
|
|
31
|
+
}
|
|
32
|
+
function gradeTone(g: KaizenGrading): string {
|
|
33
|
+
if (g.status === 'failed') return 'text-slate-500'
|
|
34
|
+
if (g.grade == null) return 'text-slate-400'
|
|
35
|
+
if (g.grade >= 5) return 'text-emerald-400'
|
|
36
|
+
if (g.grade >= 4) return 'text-lime-400'
|
|
37
|
+
if (g.grade === 3) return 'text-amber-400'
|
|
38
|
+
return 'text-rose-400'
|
|
39
|
+
}
|
|
40
|
+
function statusLabel(g: KaizenGrading): string {
|
|
41
|
+
if (g.status === 'scheduled') return 'Scheduled'
|
|
42
|
+
if (g.status === 'running') return 'Grading…'
|
|
43
|
+
if (g.status === 'failed') return 'Failed'
|
|
44
|
+
return g.grade != null ? `${g.grade}/5` : 'Graded'
|
|
45
|
+
}
|
|
46
|
+
</script>
|
|
47
|
+
|
|
48
|
+
<template>
|
|
49
|
+
<Teleport to="body">
|
|
50
|
+
<Transition name="kz-fade">
|
|
51
|
+
<div
|
|
52
|
+
v-if="open"
|
|
53
|
+
class="fixed inset-0 z-[60] flex flex-col bg-slate-950/96 backdrop-blur-sm"
|
|
54
|
+
role="dialog"
|
|
55
|
+
aria-modal="true"
|
|
56
|
+
>
|
|
57
|
+
<header class="flex items-center gap-3 border-b border-slate-800 px-6 py-4">
|
|
58
|
+
<div class="flex h-9 w-9 shrink-0 items-center justify-center rounded-lg bg-teal-500/15">
|
|
59
|
+
<UIcon name="i-lucide-sparkles" class="h-5 w-5 text-teal-400" />
|
|
60
|
+
</div>
|
|
61
|
+
<div class="min-w-0">
|
|
62
|
+
<h1 class="truncate text-base font-semibold text-white">Kaizen</h1>
|
|
63
|
+
<p class="truncate text-xs text-slate-500">
|
|
64
|
+
Continuous-improvement grading of agent runs
|
|
65
|
+
</p>
|
|
66
|
+
</div>
|
|
67
|
+
<div class="ml-auto flex items-center gap-2">
|
|
68
|
+
<UButton
|
|
69
|
+
icon="i-lucide-refresh-cw"
|
|
70
|
+
size="xs"
|
|
71
|
+
color="neutral"
|
|
72
|
+
variant="ghost"
|
|
73
|
+
:loading="kaizen.loadingOverview"
|
|
74
|
+
@click="kaizen.loadOverview()"
|
|
75
|
+
>
|
|
76
|
+
Refresh
|
|
77
|
+
</UButton>
|
|
78
|
+
<UButton icon="i-lucide-x" size="xs" color="neutral" variant="ghost" @click="close">
|
|
79
|
+
Close
|
|
80
|
+
</UButton>
|
|
81
|
+
</div>
|
|
82
|
+
</header>
|
|
83
|
+
|
|
84
|
+
<div
|
|
85
|
+
v-if="kaizen.available === false"
|
|
86
|
+
class="flex flex-1 items-center justify-center text-sm text-slate-500"
|
|
87
|
+
>
|
|
88
|
+
Kaizen is not configured on this deployment.
|
|
89
|
+
</div>
|
|
90
|
+
|
|
91
|
+
<div v-else class="grid flex-1 grid-cols-1 gap-6 overflow-auto p-6 lg:grid-cols-3">
|
|
92
|
+
<!-- Verified combos -->
|
|
93
|
+
<section class="lg:col-span-1">
|
|
94
|
+
<h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
|
|
95
|
+
<UIcon name="i-lucide-badge-check" class="h-4 w-4 text-emerald-400" />
|
|
96
|
+
Verified combos
|
|
97
|
+
<span class="text-xs font-normal text-slate-500">({{ kaizen.verifiedCount }})</span>
|
|
98
|
+
</h2>
|
|
99
|
+
<p class="mb-3 text-[11px] text-slate-500">
|
|
100
|
+
A prompt + agent + model combination that graded 4 or 5 with no recommendations
|
|
101
|
+
five times in a row. These are no longer graded.
|
|
102
|
+
</p>
|
|
103
|
+
<ul class="space-y-2">
|
|
104
|
+
<li
|
|
105
|
+
v-for="c in kaizen.verified"
|
|
106
|
+
:key="c.comboKey"
|
|
107
|
+
class="rounded-lg border border-slate-800 bg-slate-900/40 p-2.5"
|
|
108
|
+
>
|
|
109
|
+
<div class="flex items-center gap-2">
|
|
110
|
+
<UIcon
|
|
111
|
+
:name="meta(c.agentKind).icon"
|
|
112
|
+
class="h-3.5 w-3.5 shrink-0"
|
|
113
|
+
:style="{ color: meta(c.agentKind).color }"
|
|
114
|
+
/>
|
|
115
|
+
<span class="text-xs font-medium text-slate-200">{{
|
|
116
|
+
meta(c.agentKind).label
|
|
117
|
+
}}</span>
|
|
118
|
+
<UIcon
|
|
119
|
+
v-if="c.verified"
|
|
120
|
+
name="i-lucide-badge-check"
|
|
121
|
+
class="ml-auto h-3.5 w-3.5 text-emerald-400"
|
|
122
|
+
/>
|
|
123
|
+
<span v-else class="ml-auto text-[11px] text-slate-500">
|
|
124
|
+
{{ c.consecutiveHighGrades }}/5
|
|
125
|
+
</span>
|
|
126
|
+
</div>
|
|
127
|
+
<div class="mt-1 truncate text-[11px] text-slate-500" :title="c.model">
|
|
128
|
+
{{ c.model }} · prompt v{{ c.promptVersion }}
|
|
129
|
+
</div>
|
|
130
|
+
</li>
|
|
131
|
+
<li v-if="kaizen.verified.length === 0" class="text-xs text-slate-600">
|
|
132
|
+
No combos yet.
|
|
133
|
+
</li>
|
|
134
|
+
</ul>
|
|
135
|
+
</section>
|
|
136
|
+
|
|
137
|
+
<!-- Grading history -->
|
|
138
|
+
<section class="lg:col-span-2">
|
|
139
|
+
<h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
|
|
140
|
+
<UIcon name="i-lucide-history" class="h-4 w-4 text-teal-400" />
|
|
141
|
+
Grading history
|
|
142
|
+
</h2>
|
|
143
|
+
<div class="overflow-hidden rounded-lg border border-slate-800">
|
|
144
|
+
<table class="w-full text-left text-xs">
|
|
145
|
+
<thead class="bg-slate-900/60 text-[11px] uppercase tracking-wide text-slate-500">
|
|
146
|
+
<tr>
|
|
147
|
+
<th class="px-3 py-2 font-medium">When</th>
|
|
148
|
+
<th class="px-3 py-2 font-medium">Agent</th>
|
|
149
|
+
<th class="px-3 py-2 font-medium">Model</th>
|
|
150
|
+
<th class="px-3 py-2 font-medium">Grade</th>
|
|
151
|
+
<th class="px-3 py-2 font-medium">Recommendations</th>
|
|
152
|
+
</tr>
|
|
153
|
+
</thead>
|
|
154
|
+
<tbody class="divide-y divide-slate-800/70">
|
|
155
|
+
<tr v-for="g in kaizen.history" :key="g.id" class="align-top">
|
|
156
|
+
<td class="whitespace-nowrap px-3 py-2 text-slate-500">
|
|
157
|
+
{{ when(g.createdAt) }}
|
|
158
|
+
</td>
|
|
159
|
+
<td class="px-3 py-2">
|
|
160
|
+
<span class="flex items-center gap-1.5">
|
|
161
|
+
<UIcon
|
|
162
|
+
:name="meta(g.agentKind).icon"
|
|
163
|
+
class="h-3.5 w-3.5"
|
|
164
|
+
:style="{ color: meta(g.agentKind).color }"
|
|
165
|
+
/>
|
|
166
|
+
<span class="text-slate-200">{{ meta(g.agentKind).label }}</span>
|
|
167
|
+
<span class="text-slate-600">v{{ g.promptVersion }}</span>
|
|
168
|
+
</span>
|
|
169
|
+
</td>
|
|
170
|
+
<td class="max-w-[12rem] truncate px-3 py-2 text-slate-400" :title="g.model">
|
|
171
|
+
{{ g.model }}
|
|
172
|
+
</td>
|
|
173
|
+
<td class="whitespace-nowrap px-3 py-2 font-semibold" :class="gradeTone(g)">
|
|
174
|
+
{{ statusLabel(g) }}
|
|
175
|
+
</td>
|
|
176
|
+
<td class="px-3 py-2 text-slate-400">
|
|
177
|
+
<ul v-if="g.recommendations.length" class="list-disc space-y-0.5 pl-4">
|
|
178
|
+
<li v-for="(r, i) in g.recommendations" :key="i">{{ r }}</li>
|
|
179
|
+
</ul>
|
|
180
|
+
<span v-else-if="g.status === 'complete'" class="text-slate-600">—</span>
|
|
181
|
+
<span v-else-if="g.error" class="text-rose-400/80">{{ g.error }}</span>
|
|
182
|
+
</td>
|
|
183
|
+
</tr>
|
|
184
|
+
<tr v-if="kaizen.history.length === 0">
|
|
185
|
+
<td colspan="5" class="px-3 py-6 text-center text-slate-600">
|
|
186
|
+
No gradings yet.
|
|
187
|
+
</td>
|
|
188
|
+
</tr>
|
|
189
|
+
</tbody>
|
|
190
|
+
</table>
|
|
191
|
+
</div>
|
|
192
|
+
</section>
|
|
193
|
+
</div>
|
|
194
|
+
</div>
|
|
195
|
+
</Transition>
|
|
196
|
+
</Teleport>
|
|
197
|
+
</template>
|
|
198
|
+
|
|
199
|
+
<style scoped>
|
|
200
|
+
.kz-fade-enter-active,
|
|
201
|
+
.kz-fade-leave-active {
|
|
202
|
+
transition: opacity 0.15s ease;
|
|
203
|
+
}
|
|
204
|
+
.kz-fade-enter-from,
|
|
205
|
+
.kz-fade-leave-to {
|
|
206
|
+
opacity: 0;
|
|
207
|
+
}
|
|
208
|
+
</style>
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
<script setup lang="ts">
|
|
2
|
+
import { computed, watch } from 'vue'
|
|
3
|
+
|
|
4
|
+
// Per-step Kaizen grading status, shown inside the run window (NOT on the board). Reads
|
|
5
|
+
// the grading for this run's step from the kaizen store, lazily loading the run's
|
|
6
|
+
// gradings on first mount, and renders the scheduled→running→complete status plus the
|
|
7
|
+
// grade, summary and recommendations once available.
|
|
8
|
+
const props = defineProps<{
|
|
9
|
+
/** The run (execution) id. */
|
|
10
|
+
instanceId: string | null | undefined
|
|
11
|
+
/** The step's index within the run. */
|
|
12
|
+
stepIndex: number | null | undefined
|
|
13
|
+
}>()
|
|
14
|
+
|
|
15
|
+
const kaizen = useKaizenStore()
|
|
16
|
+
|
|
17
|
+
const grading = computed(() => {
|
|
18
|
+
if (!props.instanceId || props.stepIndex == null) return null
|
|
19
|
+
return kaizen.gradingForStep(props.instanceId, props.stepIndex)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
// Load the run's gradings once when we have an id and nothing cached yet. The stream
|
|
23
|
+
// keeps them live afterwards.
|
|
24
|
+
watch(
|
|
25
|
+
() => props.instanceId,
|
|
26
|
+
(id) => {
|
|
27
|
+
if (id && kaizen.gradingsFor(id).length === 0 && kaizen.available !== false) {
|
|
28
|
+
void kaizen.loadForExecution(id)
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
{ immediate: true },
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
const tone = computed(() => {
|
|
35
|
+
const g = grading.value
|
|
36
|
+
if (!g || g.grade == null) return 'text-slate-400'
|
|
37
|
+
if (g.grade >= 5) return 'text-emerald-400'
|
|
38
|
+
if (g.grade >= 4) return 'text-lime-400'
|
|
39
|
+
if (g.grade === 3) return 'text-amber-400'
|
|
40
|
+
return 'text-rose-400'
|
|
41
|
+
})
|
|
42
|
+
</script>
|
|
43
|
+
|
|
44
|
+
<template>
|
|
45
|
+
<section v-if="grading" class="rounded-xl border border-slate-800 bg-slate-900/50 p-4">
|
|
46
|
+
<div class="flex items-center gap-2">
|
|
47
|
+
<UIcon name="i-lucide-sparkles" class="h-4 w-4 text-teal-400" />
|
|
48
|
+
<h3 class="text-sm font-semibold text-slate-200">Kaizen grading</h3>
|
|
49
|
+
<span class="ml-auto flex items-center gap-1.5 text-xs">
|
|
50
|
+
<template v-if="grading.status === 'scheduled'">
|
|
51
|
+
<UIcon name="i-lucide-clock" class="h-3.5 w-3.5 text-slate-500" />
|
|
52
|
+
<span class="text-slate-400">Scheduled</span>
|
|
53
|
+
</template>
|
|
54
|
+
<template v-else-if="grading.status === 'running'">
|
|
55
|
+
<UIcon name="i-lucide-loader-circle" class="h-3.5 w-3.5 animate-spin text-teal-400" />
|
|
56
|
+
<span class="text-teal-300">Grading…</span>
|
|
57
|
+
</template>
|
|
58
|
+
<template v-else-if="grading.status === 'failed'">
|
|
59
|
+
<UIcon name="i-lucide-circle-alert" class="h-3.5 w-3.5 text-rose-400" />
|
|
60
|
+
<span class="text-rose-400">Failed</span>
|
|
61
|
+
</template>
|
|
62
|
+
<template v-else>
|
|
63
|
+
<span class="font-semibold" :class="tone">{{ grading.grade }}/5</span>
|
|
64
|
+
</template>
|
|
65
|
+
</span>
|
|
66
|
+
</div>
|
|
67
|
+
|
|
68
|
+
<p v-if="grading.status === 'scheduled'" class="mt-2 text-[11px] text-slate-500">
|
|
69
|
+
A Kaizen grading is queued for this step. It runs in the background after the run.
|
|
70
|
+
</p>
|
|
71
|
+
|
|
72
|
+
<template v-else-if="grading.status === 'complete'">
|
|
73
|
+
<p v-if="grading.summary" class="mt-2 text-xs text-slate-300">{{ grading.summary }}</p>
|
|
74
|
+
<div v-if="grading.recommendations.length" class="mt-2">
|
|
75
|
+
<p class="text-[11px] font-medium uppercase tracking-wide text-slate-500">
|
|
76
|
+
Recommendations
|
|
77
|
+
</p>
|
|
78
|
+
<ul class="mt-1 list-disc space-y-0.5 pl-4 text-xs text-slate-300">
|
|
79
|
+
<li v-for="(r, i) in grading.recommendations" :key="i">{{ r }}</li>
|
|
80
|
+
</ul>
|
|
81
|
+
</div>
|
|
82
|
+
<p v-else class="mt-2 text-[11px] text-emerald-400/80">
|
|
83
|
+
Smooth interaction — nothing to improve.
|
|
84
|
+
</p>
|
|
85
|
+
<p v-if="grading.graderModel" class="mt-2 text-[10px] text-slate-600">
|
|
86
|
+
Graded by {{ grading.graderModel }}
|
|
87
|
+
</p>
|
|
88
|
+
</template>
|
|
89
|
+
|
|
90
|
+
<p v-else-if="grading.status === 'failed'" class="mt-2 text-[11px] text-rose-400/80">
|
|
91
|
+
{{ grading.error ?? 'The grading could not be completed.' }}
|
|
92
|
+
</p>
|
|
93
|
+
</section>
|
|
94
|
+
</template>
|
|
@@ -138,6 +138,18 @@ watch(
|
|
|
138
138
|
>
|
|
139
139
|
Sandbox
|
|
140
140
|
</UButton>
|
|
141
|
+
<!-- The Kaizen screen: grading history + verified prompt/agent/model combos. -->
|
|
142
|
+
<UButton
|
|
143
|
+
block
|
|
144
|
+
color="primary"
|
|
145
|
+
variant="soft"
|
|
146
|
+
size="sm"
|
|
147
|
+
icon="i-lucide-sparkles"
|
|
148
|
+
class="justify-start"
|
|
149
|
+
@click="ui.openKaizen()"
|
|
150
|
+
>
|
|
151
|
+
Kaizen
|
|
152
|
+
</UButton>
|
|
141
153
|
</div>
|
|
142
154
|
</section>
|
|
143
155
|
|
|
@@ -313,6 +313,12 @@ async function copyOutput() {
|
|
|
313
313
|
/>
|
|
314
314
|
</section>
|
|
315
315
|
|
|
316
|
+
<!-- post-run Kaizen grading status + results for this step (run-details only) -->
|
|
317
|
+
<KaizenStepStatus
|
|
318
|
+
:instance-id="ctx?.instanceId ?? null"
|
|
319
|
+
:step-index="ctx?.stepIndex ?? null"
|
|
320
|
+
/>
|
|
321
|
+
|
|
316
322
|
<!-- companion rework budget spent: the shared iteration-cap decision
|
|
317
323
|
(one more round / proceed with the current output / stop & reset) -->
|
|
318
324
|
<IterationCapPrompt
|
|
@@ -68,6 +68,7 @@ const draft = reactive({
|
|
|
68
68
|
taskLimitShared: 5 as number,
|
|
69
69
|
perType: {} as Record<CreateTaskType, number>,
|
|
70
70
|
storeAgentContext: true,
|
|
71
|
+
kaizenEnabled: true,
|
|
71
72
|
// Budget: empty string ⇒ "use the built-in default" (null on the wire).
|
|
72
73
|
spendCurrency: '',
|
|
73
74
|
spendMonthlyLimit: '',
|
|
@@ -81,6 +82,7 @@ function hydrate() {
|
|
|
81
82
|
const pt = s.taskLimitPerType ?? {}
|
|
82
83
|
for (const t of TASK_TYPES) draft.perType[t] = pt[t] ?? 3
|
|
83
84
|
draft.storeAgentContext = s.storeAgentContext
|
|
85
|
+
draft.kaizenEnabled = s.kaizenEnabled
|
|
84
86
|
draft.spendCurrency = s.spendCurrency ?? ''
|
|
85
87
|
draft.spendMonthlyLimit = s.spendMonthlyLimit == null ? '' : String(s.spendMonthlyLimit)
|
|
86
88
|
}
|
|
@@ -107,6 +109,7 @@ async function save() {
|
|
|
107
109
|
)
|
|
108
110
|
: null,
|
|
109
111
|
storeAgentContext: draft.storeAgentContext,
|
|
112
|
+
kaizenEnabled: draft.kaizenEnabled,
|
|
110
113
|
})
|
|
111
114
|
toast.add({ title: 'Settings saved', icon: 'i-lucide-check', color: 'success' })
|
|
112
115
|
} catch (e) {
|
|
@@ -237,6 +240,23 @@ async function saveBudget() {
|
|
|
237
240
|
</label>
|
|
238
241
|
</section>
|
|
239
242
|
|
|
243
|
+
<!-- Kaizen agent -->
|
|
244
|
+
<section class="space-y-2">
|
|
245
|
+
<h3 class="text-sm font-semibold text-slate-200">Kaizen agent</h3>
|
|
246
|
+
<p class="text-[11px] text-slate-400">
|
|
247
|
+
After each run completes, the Kaizen agent grades how every agent step went — smooth
|
|
248
|
+
and efficient vs confused and chaotic — and recommends prompt/model improvements. A
|
|
249
|
+
prompt + agent + model combination that grades highly with no recommendations five
|
|
250
|
+
times in a row is marked verified and is no longer graded. Grading runs in the
|
|
251
|
+
background and is shown inside run details and the Kaizen screen. Set the grader's
|
|
252
|
+
model in Model Configuration (the “Kaizen” agent).
|
|
253
|
+
</p>
|
|
254
|
+
<label class="flex items-center gap-2">
|
|
255
|
+
<USwitch v-model="draft.kaizenEnabled" size="sm" />
|
|
256
|
+
<span class="text-sm text-slate-200">Grade agent runs with Kaizen</span>
|
|
257
|
+
</label>
|
|
258
|
+
</section>
|
|
259
|
+
|
|
240
260
|
<div class="flex justify-end">
|
|
241
261
|
<UButton
|
|
242
262
|
color="primary"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { KaizenGrading, KaizenOverview } from '~/types/domain'
|
|
2
|
+
import type { ApiContext } from './context'
|
|
3
|
+
|
|
4
|
+
/** Kaizen (post-run grading) read endpoints: the screen overview + a run's gradings. */
|
|
5
|
+
export function kaizenApi({ http, ws }: ApiContext) {
|
|
6
|
+
return {
|
|
7
|
+
// The Kaizen screen: recent grading history + the verified-combo library.
|
|
8
|
+
getKaizenOverview: (workspaceId: string) => http<KaizenOverview>(`${ws(workspaceId)}/kaizen`),
|
|
9
|
+
|
|
10
|
+
// The gradings recorded for one run (the run-window status surface).
|
|
11
|
+
getKaizenForExecution: (workspaceId: string, executionId: string) =>
|
|
12
|
+
http<{ gradings: KaizenGrading[] }>(
|
|
13
|
+
`${ws(workspaceId)}/executions/${encodeURIComponent(executionId)}/kaizen`,
|
|
14
|
+
),
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -9,6 +9,7 @@ import { executionApi } from './api/execution'
|
|
|
9
9
|
import { fragmentsApi } from './api/fragments'
|
|
10
10
|
import { githubApi } from './api/github'
|
|
11
11
|
import { humanTestApi } from './api/humanTest'
|
|
12
|
+
import { kaizenApi } from './api/kaizen'
|
|
12
13
|
import { modelsApi } from './api/models'
|
|
13
14
|
import { notificationsApi } from './api/notifications'
|
|
14
15
|
import { presetsApi } from './api/presets'
|
|
@@ -85,6 +86,7 @@ export function useApi() {
|
|
|
85
86
|
...tasksApi(ctx),
|
|
86
87
|
...reviewsApi(ctx),
|
|
87
88
|
...humanTestApi(ctx),
|
|
89
|
+
...kaizenApi(ctx),
|
|
88
90
|
...specApi(ctx),
|
|
89
91
|
...notificationsApi(ctx),
|
|
90
92
|
...presetsApi(ctx),
|
|
@@ -23,6 +23,7 @@ export function useWorkspaceStream() {
|
|
|
23
23
|
const requirements = useRequirementsStore()
|
|
24
24
|
const consensus = useConsensusStore()
|
|
25
25
|
const clarity = useClarityStore()
|
|
26
|
+
const kaizen = useKaizenStore()
|
|
26
27
|
const api = useApi()
|
|
27
28
|
const apiBase = useRuntimeConfig().public.apiBase
|
|
28
29
|
|
|
@@ -87,6 +88,11 @@ export function useWorkspaceStream() {
|
|
|
87
88
|
// cache so an open review window / inspector reflects it live ("incorporating…" → the
|
|
88
89
|
// next cycle / converged). The summons back, when needed, arrives as a `notification`.
|
|
89
90
|
clarity.upsert(event.review)
|
|
91
|
+
} else if (event.type === 'kaizen') {
|
|
92
|
+
// A post-run Kaizen grading was scheduled, started or completed — fold it into the
|
|
93
|
+
// run cache (so an open run window shows scheduled→running→complete live) and the
|
|
94
|
+
// Kaizen screen history. Never surfaced on the board.
|
|
95
|
+
kaizen.upsert(event.grading)
|
|
90
96
|
}
|
|
91
97
|
}
|
|
92
98
|
|
package/app/pages/index.vue
CHANGED
|
@@ -11,6 +11,7 @@ import DecisionModal from '~/components/panels/DecisionModal.vue'
|
|
|
11
11
|
import AgentStepDetail from '~/components/panels/AgentStepDetail.vue'
|
|
12
12
|
import StepResultViewHost from '~/components/panels/StepResultViewHost.vue'
|
|
13
13
|
import ObservabilityPanel from '~/components/panels/ObservabilityPanel.vue'
|
|
14
|
+
import KaizenPanel from '~/components/kaizen/KaizenPanel.vue'
|
|
14
15
|
import BlockFocusView from '~/components/focus/BlockFocusView.vue'
|
|
15
16
|
import DocumentSourceConnectModal from '~/components/documents/DocumentSourceConnectModal.vue'
|
|
16
17
|
import DocumentImportModal from '~/components/documents/DocumentImportModal.vue'
|
|
@@ -171,6 +172,7 @@ watch(
|
|
|
171
172
|
<AgentStepDetail />
|
|
172
173
|
<StepResultViewHost />
|
|
173
174
|
<ObservabilityPanel />
|
|
175
|
+
<KaizenPanel />
|
|
174
176
|
<DocumentSourceConnectModal />
|
|
175
177
|
<DocumentImportModal />
|
|
176
178
|
<SpawnPreviewModal />
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { defineStore } from 'pinia'
|
|
2
|
+
import { computed, ref } from 'vue'
|
|
3
|
+
import type { KaizenGrading, KaizenVerifiedCombo } from '~/types/domain'
|
|
4
|
+
import { useWorkspaceStore } from '~/stores/workspace'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Kaizen state: per-run gradings (for the run-window status surface) and the
|
|
8
|
+
* workspace-wide history + verified-combo library (for the Kaizen screen). Gradings
|
|
9
|
+
* arrive both via lazy loads and live over the workspace stream (`upsert`). Never
|
|
10
|
+
* surfaced on the board — only inside run details + the dedicated screen.
|
|
11
|
+
*/
|
|
12
|
+
export const useKaizenStore = defineStore('kaizen', () => {
|
|
13
|
+
const api = useApi()
|
|
14
|
+
|
|
15
|
+
/** Gradings keyed by run (execution) id, for the run window. */
|
|
16
|
+
const byExecution = ref<Record<string, KaizenGrading[]>>({})
|
|
17
|
+
/** Recent grading history for the Kaizen screen. */
|
|
18
|
+
const history = ref<KaizenGrading[]>([])
|
|
19
|
+
/** The verified-combo library for the Kaizen screen. */
|
|
20
|
+
const verified = ref<KaizenVerifiedCombo[]>([])
|
|
21
|
+
const loadingOverview = ref(false)
|
|
22
|
+
const loadingExecution = ref<Set<string>>(new Set())
|
|
23
|
+
/** 503 ⇒ the Kaizen feature isn't configured on this deployment. */
|
|
24
|
+
const available = ref<boolean | null>(null)
|
|
25
|
+
|
|
26
|
+
function gradingsFor(executionId: string): KaizenGrading[] {
|
|
27
|
+
return byExecution.value[executionId] ?? []
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** The grading for a specific step of a run, if any. */
|
|
31
|
+
function gradingForStep(executionId: string, stepIndex: number): KaizenGrading | null {
|
|
32
|
+
return gradingsFor(executionId).find((g) => g.stepIndex === stepIndex) ?? null
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function loadOverview() {
|
|
36
|
+
const ws = useWorkspaceStore()
|
|
37
|
+
loadingOverview.value = true
|
|
38
|
+
try {
|
|
39
|
+
const overview = await api.getKaizenOverview(ws.requireId())
|
|
40
|
+
history.value = overview.gradings
|
|
41
|
+
verified.value = overview.verified
|
|
42
|
+
available.value = true
|
|
43
|
+
} catch (e) {
|
|
44
|
+
if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
|
|
45
|
+
available.value = false
|
|
46
|
+
else throw e
|
|
47
|
+
} finally {
|
|
48
|
+
loadingOverview.value = false
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function loadForExecution(executionId: string) {
|
|
53
|
+
const ws = useWorkspaceStore()
|
|
54
|
+
loadingExecution.value = new Set(loadingExecution.value).add(executionId)
|
|
55
|
+
try {
|
|
56
|
+
const { gradings } = await api.getKaizenForExecution(ws.requireId(), executionId)
|
|
57
|
+
byExecution.value = { ...byExecution.value, [executionId]: gradings }
|
|
58
|
+
available.value = true
|
|
59
|
+
} catch (e) {
|
|
60
|
+
if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
|
|
61
|
+
available.value = false
|
|
62
|
+
else throw e
|
|
63
|
+
} finally {
|
|
64
|
+
const next = new Set(loadingExecution.value)
|
|
65
|
+
next.delete(executionId)
|
|
66
|
+
loadingExecution.value = next
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Fold a grading pushed over the stream into both the run cache and the screen history. */
|
|
71
|
+
function upsert(grading: KaizenGrading) {
|
|
72
|
+
const current = byExecution.value[grading.executionId] ?? []
|
|
73
|
+
const replaced = current.some((g) => g.id === grading.id)
|
|
74
|
+
const nextRun = replaced
|
|
75
|
+
? current.map((g) => (g.id === grading.id ? grading : g))
|
|
76
|
+
: [...current, grading]
|
|
77
|
+
byExecution.value = { ...byExecution.value, [grading.executionId]: nextRun }
|
|
78
|
+
// Keep the screen history live too (newest first), if it's been loaded.
|
|
79
|
+
const inHistory = history.value.some((g) => g.id === grading.id)
|
|
80
|
+
if (inHistory) history.value = history.value.map((g) => (g.id === grading.id ? grading : g))
|
|
81
|
+
else history.value = [grading, ...history.value]
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const isLoadingExecution = (executionId: string) => loadingExecution.value.has(executionId)
|
|
85
|
+
const verifiedCount = computed(() => verified.value.filter((c) => c.verified).length)
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
byExecution,
|
|
89
|
+
history,
|
|
90
|
+
verified,
|
|
91
|
+
available,
|
|
92
|
+
loadingOverview,
|
|
93
|
+
verifiedCount,
|
|
94
|
+
gradingsFor,
|
|
95
|
+
gradingForStep,
|
|
96
|
+
loadOverview,
|
|
97
|
+
loadForExecution,
|
|
98
|
+
upsert,
|
|
99
|
+
isLoadingExecution,
|
|
100
|
+
}
|
|
101
|
+
})
|
package/app/stores/ui.ts
CHANGED
|
@@ -149,6 +149,10 @@ export const useUiStore = defineStore('ui', () => {
|
|
|
149
149
|
// per-call detail from the observability store on open.
|
|
150
150
|
const observabilityInstanceId = ref<string | null>(null)
|
|
151
151
|
|
|
152
|
+
// The Kaizen screen (grading history + verified-combo library), a full-panel overlay
|
|
153
|
+
// opened from the sidebar. Distinct from the per-run grading status shown in run details.
|
|
154
|
+
const kaizenScreenOpen = ref(false)
|
|
155
|
+
|
|
152
156
|
/** Current canvas zoom (driven by Vue Flow viewport). */
|
|
153
157
|
const zoom = ref(1)
|
|
154
158
|
|
|
@@ -473,6 +477,12 @@ export const useUiStore = defineStore('ui', () => {
|
|
|
473
477
|
function closeObservability() {
|
|
474
478
|
observabilityInstanceId.value = null
|
|
475
479
|
}
|
|
480
|
+
function openKaizen() {
|
|
481
|
+
kaizenScreenOpen.value = true
|
|
482
|
+
}
|
|
483
|
+
function closeKaizen() {
|
|
484
|
+
kaizenScreenOpen.value = false
|
|
485
|
+
}
|
|
476
486
|
|
|
477
487
|
return {
|
|
478
488
|
selectedBlockId,
|
|
@@ -513,6 +523,7 @@ export const useUiStore = defineStore('ui', () => {
|
|
|
513
523
|
closeResultView,
|
|
514
524
|
stepDetail,
|
|
515
525
|
observabilityInstanceId,
|
|
526
|
+
kaizenScreenOpen,
|
|
516
527
|
zoom,
|
|
517
528
|
lod,
|
|
518
529
|
expandedFrames,
|
|
@@ -589,5 +600,7 @@ export const useUiStore = defineStore('ui', () => {
|
|
|
589
600
|
closeStepDetail,
|
|
590
601
|
openObservability,
|
|
591
602
|
closeObservability,
|
|
603
|
+
openKaizen,
|
|
604
|
+
closeKaizen,
|
|
592
605
|
}
|
|
593
606
|
})
|
package/app/types/domain.ts
CHANGED
|
@@ -309,6 +309,9 @@ export type AgentKind =
|
|
|
309
309
|
// validate the change in a live URL, dispatching the Tester's `fixer` (from findings) or
|
|
310
310
|
// the `conflict-resolver` (on a conflicting pull-main) on demand. Opens its own window.
|
|
311
311
|
| 'human-test'
|
|
312
|
+
// The Kaizen agent: post-run grader (NOT a pipeline step / palette archetype). Surfaced
|
|
313
|
+
// only in Model Configuration (its model is pinnable like any agent) and run details.
|
|
314
|
+
| 'kaizen'
|
|
312
315
|
|
|
313
316
|
/** A draggable agent definition shown in the agent palette. */
|
|
314
317
|
/** Palette grouping for the agent archetypes (collapsible sections in the builder). */
|
|
@@ -495,6 +498,8 @@ export interface WorkspaceSettings {
|
|
|
495
498
|
taskLimitPerType: Partial<Record<CreateTaskType, number>> | null
|
|
496
499
|
/** Whether to store the complete provided-context snapshot for each container agent. */
|
|
497
500
|
storeAgentContext: boolean
|
|
501
|
+
/** Whether the Kaizen agent grades agent steps after each run. On by default. */
|
|
502
|
+
kaizenEnabled: boolean
|
|
498
503
|
/** Spend budget currency (ISO 4217). Null ⇒ the built-in default (`EUR`). */
|
|
499
504
|
spendCurrency: string | null
|
|
500
505
|
/** Monthly spend budget in `spendCurrency`. Null ⇒ the built-in default. */
|
|
@@ -508,6 +513,7 @@ export interface UpdateWorkspaceSettingsInput {
|
|
|
508
513
|
taskLimitShared?: number | null
|
|
509
514
|
taskLimitPerType?: Partial<Record<CreateTaskType, number>> | null
|
|
510
515
|
storeAgentContext?: boolean
|
|
516
|
+
kaizenEnabled?: boolean
|
|
511
517
|
spendCurrency?: string | null
|
|
512
518
|
spendMonthlyLimit?: number | null
|
|
513
519
|
}
|
|
@@ -520,6 +526,50 @@ export interface ServiceFragmentDefaults {
|
|
|
520
526
|
fragmentIds: string[]
|
|
521
527
|
}
|
|
522
528
|
|
|
529
|
+
/** Lifecycle of a Kaizen grading. Mirrors `@cat-factory/contracts`. */
|
|
530
|
+
export type KaizenGradingStatus = 'scheduled' | 'running' | 'complete' | 'failed'
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* A Kaizen grading of one completed agent step (how smooth/efficient the interaction
|
|
534
|
+
* was, 1..5, plus recommendations). Mirrors `@cat-factory/contracts`.
|
|
535
|
+
*/
|
|
536
|
+
export interface KaizenGrading {
|
|
537
|
+
id: string
|
|
538
|
+
executionId: string
|
|
539
|
+
blockId: string
|
|
540
|
+
stepIndex: number
|
|
541
|
+
agentKind: string
|
|
542
|
+
model: string
|
|
543
|
+
promptVersion: number
|
|
544
|
+
comboKey: string
|
|
545
|
+
status: KaizenGradingStatus
|
|
546
|
+
grade: number | null
|
|
547
|
+
summary: string
|
|
548
|
+
recommendations: string[]
|
|
549
|
+
graderModel: string | null
|
|
550
|
+
error: string | null
|
|
551
|
+
createdAt: number
|
|
552
|
+
updatedAt: number
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/** A `(promptVersion, agentKind, model)` combo's verification progress. */
|
|
556
|
+
export interface KaizenVerifiedCombo {
|
|
557
|
+
comboKey: string
|
|
558
|
+
agentKind: string
|
|
559
|
+
model: string
|
|
560
|
+
promptVersion: number
|
|
561
|
+
consecutiveHighGrades: number
|
|
562
|
+
verified: boolean
|
|
563
|
+
verifiedAt: number | null
|
|
564
|
+
updatedAt: number
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/** The Kaizen screen payload: recent grading history + the verified-combo library. */
|
|
568
|
+
export interface KaizenOverview {
|
|
569
|
+
gradings: KaizenGrading[]
|
|
570
|
+
verified: KaizenVerifiedCombo[]
|
|
571
|
+
}
|
|
572
|
+
|
|
523
573
|
/**
|
|
524
574
|
* Real-time events pushed over the workspace WebSocket stream (see
|
|
525
575
|
* `useWorkspaceStream`). Mirrors `WorkspaceEvent` in `@cat-factory/contracts`.
|
|
@@ -533,6 +583,7 @@ export type WorkspaceEvent =
|
|
|
533
583
|
| { type: 'requirements'; review: RequirementReview; at: number }
|
|
534
584
|
| { type: 'consensus'; session: ConsensusSession; at: number }
|
|
535
585
|
| { type: 'clarity'; review: ClarityReview; at: number }
|
|
586
|
+
| { type: 'kaizen'; grading: KaizenGrading; at: number }
|
|
536
587
|
|
|
537
588
|
/** Level-of-detail buckets driven by the canvas zoom level. Shallow → deep:
|
|
538
589
|
* `far`/`mid`/`close` govern a service frame (chip → card → opened with tasks);
|
package/app/utils/catalog.ts
CHANGED
|
@@ -330,6 +330,17 @@ export const SYSTEM_AGENT_META: Record<string, AgentArchetype> = {
|
|
|
330
330
|
color: '#a3e635',
|
|
331
331
|
description: 'Scores the PR and auto-merges within the task thresholds, or asks for review.',
|
|
332
332
|
},
|
|
333
|
+
// The Kaizen agent grades agent steps AFTER a run completes (continuous improvement).
|
|
334
|
+
// It is NOT a pipeline step (never in the palette — no `category`), but it runs an LLM,
|
|
335
|
+
// so it needs display metadata here and a per-workspace model in Model Configuration.
|
|
336
|
+
kaizen: {
|
|
337
|
+
kind: 'kaizen',
|
|
338
|
+
label: 'Kaizen',
|
|
339
|
+
icon: 'i-lucide-sparkles',
|
|
340
|
+
color: '#2dd4bf',
|
|
341
|
+
description:
|
|
342
|
+
'Grades each completed agent step (smooth vs chaotic) after a run and recommends prompt/model improvements.',
|
|
343
|
+
},
|
|
333
344
|
// A polling gate (no model of its own) that watches the released PR's observability
|
|
334
345
|
// signals after merge and escalates to the on-call agent on a regression. NOT in any
|
|
335
346
|
// default pipeline and NOT a standing palette archetype — the palette surfaces it
|
|
@@ -365,9 +376,15 @@ export const OBSERVABILITY_GATE_ARCHETYPE: AgentArchetype =
|
|
|
365
376
|
* default model would do nothing for them.
|
|
366
377
|
*/
|
|
367
378
|
export const MODEL_CONFIGURABLE_SYSTEM_KINDS: AgentArchetype[] = [
|
|
368
|
-
...[
|
|
369
|
-
|
|
370
|
-
|
|
379
|
+
...[
|
|
380
|
+
'spec-writer',
|
|
381
|
+
'blueprints',
|
|
382
|
+
'conflict-resolver',
|
|
383
|
+
'ci-fixer',
|
|
384
|
+
'fixer',
|
|
385
|
+
'merger',
|
|
386
|
+
'kaizen',
|
|
387
|
+
].map((kind) => SYSTEM_AGENT_META[kind]!),
|
|
371
388
|
// Companions run LLMs but aren't palette-addable (they're producer toggles), so include
|
|
372
389
|
// them here to keep their per-workspace default model pinnable in the Model Defaults panel.
|
|
373
390
|
...COMPANION_ARCHETYPES,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cat-factory/app",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.31.0",
|
|
4
4
|
"description": "Reusable Nuxt layer for the Agent Architecture Board SPA (components, stores, composables, pages). Consume it from a thin deployment app via `extends: ['@cat-factory/app']` and point it at your backend with NUXT_PUBLIC_API_BASE. See deploy/frontend for an example.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|