npm - @cat-factory/app - Versions diffs - 0.30.5 → 0.31.0 - Mend

@cat-factory/app 0.30.5 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/app/components/board/BoardCanvas.vue +6 -0
package/app/components/kaizen/KaizenPanel.vue +208 -0
package/app/components/kaizen/KaizenStepStatus.vue +94 -0
package/app/components/layout/SideBar.vue +12 -0
package/app/components/panels/AgentStepDetail.vue +6 -0
package/app/components/settings/WorkspaceSettingsPanel.vue +20 -0
package/app/composables/api/kaizen.ts +16 -0
package/app/composables/useApi.ts +2 -0
package/app/composables/useWorkspaceStream.ts +6 -0
package/app/pages/index.vue +2 -0
package/app/stores/kaizen.ts +101 -0
package/app/stores/ui.ts +13 -0
package/app/stores/workspaceSettings.ts +1 -0
package/app/types/domain.ts +51 -0
package/app/utils/catalog.ts +20 -3
package/package.json +1 -1

package/app/components/board/BoardCanvas.vue CHANGED Viewed

@@ -44,6 +44,11 @@ useTaskExpansion(boardEl)
 // moving one never shifts another. The frame being dragged is lifted to the top,
 // then the hovered frame (the un-obscured one under the pointer), so overlapping
 // services can always be reached and reordered. See useFrameStacking.
+//
+// `elevate-nodes-on-select` is turned OFF on <VueFlow> for this to work: Vue Flow's
+// default adds +1000 to a selected node's z-index, so a frame stayed pinned on top
+// after a click and no amount of hovering another frame could surface it. Stacking
+// is driven purely by hover/drag here; the selection highlight is the ring, not z.
 function frameZIndex(id: string) {
   if (draggingId.value === id) return 1000
   if (hoveredFrameId.value === id) return 100
@@ -163,6 +168,7 @@ async function onDrop(event: DragEvent) {
       :max-zoom="3"
       :default-viewport="{ x: 40, y: 20, zoom: 0.85 }"
       :pan-on-drag="[0, 2]"
+      :elevate-nodes-on-select="false"
       fit-view-on-init
       @node-click="onNodeClick"
       @node-double-click="onNodeDoubleClick"

package/app/components/kaizen/KaizenPanel.vue ADDED Viewed

@@ -0,0 +1,208 @@
+<script setup lang="ts">
+import { computed, watch } from 'vue'
+import { onKeyStroke } from '@vueuse/core'
+import type { KaizenGrading } from '~/types/domain'
+import { agentKindMeta } from '~/utils/catalog'
+// The Kaizen screen: a full-panel overlay listing the workspace's grading history and
+// its verified-combo library. Opened via `ui.openKaizen()` from the sidebar. Read-only —
+// grading is scheduled by the engine and run by the background sweep, never from here.
+const ui = useUiStore()
+const kaizen = useKaizenStore()
+const open = computed(() => ui.kaizenScreenOpen)
+watch(open, (isOpen) => {
+  if (isOpen) void kaizen.loadOverview()
+})
+function close() {
+  ui.closeKaizen()
+}
+onKeyStroke('Escape', () => {
+  if (open.value) close()
+})
+function meta(kind: string) {
+  return agentKindMeta(kind)
+}
+function when(ms: number): string {
+  return new Date(ms).toLocaleString()
+}
+function gradeTone(g: KaizenGrading): string {
+  if (g.status === 'failed') return 'text-slate-500'
+  if (g.grade == null) return 'text-slate-400'
+  if (g.grade >= 5) return 'text-emerald-400'
+  if (g.grade >= 4) return 'text-lime-400'
+  if (g.grade === 3) return 'text-amber-400'
+  return 'text-rose-400'
+}
+function statusLabel(g: KaizenGrading): string {
+  if (g.status === 'scheduled') return 'Scheduled'
+  if (g.status === 'running') return 'Grading…'
+  if (g.status === 'failed') return 'Failed'
+  return g.grade != null ? `${g.grade}/5` : 'Graded'
+}
+</script>
+<template>
+  <Teleport to="body">
+    <Transition name="kz-fade">
+      <div
+        v-if="open"
+        class="fixed inset-0 z-[60] flex flex-col bg-slate-950/96 backdrop-blur-sm"
+        role="dialog"
+        aria-modal="true"
+      >
+        <header class="flex items-center gap-3 border-b border-slate-800 px-6 py-4">
+          <div class="flex h-9 w-9 shrink-0 items-center justify-center rounded-lg bg-teal-500/15">
+            <UIcon name="i-lucide-sparkles" class="h-5 w-5 text-teal-400" />
+          </div>
+          <div class="min-w-0">
+            <h1 class="truncate text-base font-semibold text-white">Kaizen</h1>
+            <p class="truncate text-xs text-slate-500">
+              Continuous-improvement grading of agent runs
+            </p>
+          </div>
+          <div class="ml-auto flex items-center gap-2">
+            <UButton
+              icon="i-lucide-refresh-cw"
+              size="xs"
+              color="neutral"
+              variant="ghost"
+              :loading="kaizen.loadingOverview"
+              @click="kaizen.loadOverview()"
+            >
+              Refresh
+            </UButton>
+            <UButton icon="i-lucide-x" size="xs" color="neutral" variant="ghost" @click="close">
+              Close
+            </UButton>
+          </div>
+        </header>
+        <div
+          v-if="kaizen.available === false"
+          class="flex flex-1 items-center justify-center text-sm text-slate-500"
+        >
+          Kaizen is not configured on this deployment.
+        </div>
+        <div v-else class="grid flex-1 grid-cols-1 gap-6 overflow-auto p-6 lg:grid-cols-3">
+          <!-- Verified combos -->
+          <section class="lg:col-span-1">
+            <h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
+              <UIcon name="i-lucide-badge-check" class="h-4 w-4 text-emerald-400" />
+              Verified combos
+              <span class="text-xs font-normal text-slate-500">({{ kaizen.verifiedCount }})</span>
+            </h2>
+            <p class="mb-3 text-[11px] text-slate-500">
+              A prompt + agent + model combination that graded 4 or 5 with no recommendations
+              five times in a row. These are no longer graded.
+            </p>
+            <ul class="space-y-2">
+              <li
+                v-for="c in kaizen.verified"
+                :key="c.comboKey"
+                class="rounded-lg border border-slate-800 bg-slate-900/40 p-2.5"
+              >
+                <div class="flex items-center gap-2">
+                  <UIcon
+                    :name="meta(c.agentKind).icon"
+                    class="h-3.5 w-3.5 shrink-0"
+                    :style="{ color: meta(c.agentKind).color }"
+                  />
+                  <span class="text-xs font-medium text-slate-200">{{
+                    meta(c.agentKind).label
+                  }}</span>
+                  <UIcon
+                    v-if="c.verified"
+                    name="i-lucide-badge-check"
+                    class="ml-auto h-3.5 w-3.5 text-emerald-400"
+                  />
+                  <span v-else class="ml-auto text-[11px] text-slate-500">
+                    {{ c.consecutiveHighGrades }}/5
+                  </span>
+                </div>
+                <div class="mt-1 truncate text-[11px] text-slate-500" :title="c.model">
+                  {{ c.model }} · prompt v{{ c.promptVersion }}
+                </div>
+              </li>
+              <li v-if="kaizen.verified.length === 0" class="text-xs text-slate-600">
+                No combos yet.
+              </li>
+            </ul>
+          </section>
+          <!-- Grading history -->
+          <section class="lg:col-span-2">
+            <h2 class="mb-2 flex items-center gap-2 text-sm font-semibold text-slate-200">
+              <UIcon name="i-lucide-history" class="h-4 w-4 text-teal-400" />
+              Grading history
+            </h2>
+            <div class="overflow-hidden rounded-lg border border-slate-800">
+              <table class="w-full text-left text-xs">
+                <thead class="bg-slate-900/60 text-[11px] uppercase tracking-wide text-slate-500">
+                  <tr>
+                    <th class="px-3 py-2 font-medium">When</th>
+                    <th class="px-3 py-2 font-medium">Agent</th>
+                    <th class="px-3 py-2 font-medium">Model</th>
+                    <th class="px-3 py-2 font-medium">Grade</th>
+                    <th class="px-3 py-2 font-medium">Recommendations</th>
+                  </tr>
+                </thead>
+                <tbody class="divide-y divide-slate-800/70">
+                  <tr v-for="g in kaizen.history" :key="g.id" class="align-top">
+                    <td class="whitespace-nowrap px-3 py-2 text-slate-500">
+                      {{ when(g.createdAt) }}
+                    </td>
+                    <td class="px-3 py-2">
+                      <span class="flex items-center gap-1.5">
+                        <UIcon
+                          :name="meta(g.agentKind).icon"
+                          class="h-3.5 w-3.5"
+                          :style="{ color: meta(g.agentKind).color }"
+                        />
+                        <span class="text-slate-200">{{ meta(g.agentKind).label }}</span>
+                        <span class="text-slate-600">v{{ g.promptVersion }}</span>
+                      </span>
+                    </td>
+                    <td class="max-w-[12rem] truncate px-3 py-2 text-slate-400" :title="g.model">
+                      {{ g.model }}
+                    </td>
+                    <td class="whitespace-nowrap px-3 py-2 font-semibold" :class="gradeTone(g)">
+                      {{ statusLabel(g) }}
+                    </td>
+                    <td class="px-3 py-2 text-slate-400">
+                      <ul v-if="g.recommendations.length" class="list-disc space-y-0.5 pl-4">
+                        <li v-for="(r, i) in g.recommendations" :key="i">{{ r }}</li>
+                      </ul>
+                      <span v-else-if="g.status === 'complete'" class="text-slate-600">—</span>
+                      <span v-else-if="g.error" class="text-rose-400/80">{{ g.error }}</span>
+                    </td>
+                  </tr>
+                  <tr v-if="kaizen.history.length === 0">
+                    <td colspan="5" class="px-3 py-6 text-center text-slate-600">
+                      No gradings yet.
+                    </td>
+                  </tr>
+                </tbody>
+              </table>
+            </div>
+          </section>
+        </div>
+      </div>
+    </Transition>
+  </Teleport>
+</template>
+<style scoped>
+.kz-fade-enter-active,
+.kz-fade-leave-active {
+  transition: opacity 0.15s ease;
+}
+.kz-fade-enter-from,
+.kz-fade-leave-to {
+  opacity: 0;
+}
+</style>

package/app/components/kaizen/KaizenStepStatus.vue ADDED Viewed

@@ -0,0 +1,94 @@
+<script setup lang="ts">
+import { computed, watch } from 'vue'
+// Per-step Kaizen grading status, shown inside the run window (NOT on the board). Reads
+// the grading for this run's step from the kaizen store, lazily loading the run's
+// gradings on first mount, and renders the scheduled→running→complete status plus the
+// grade, summary and recommendations once available.
+const props = defineProps<{
+  /** The run (execution) id. */
+  instanceId: string | null | undefined
+  /** The step's index within the run. */
+  stepIndex: number | null | undefined
+}>()
+const kaizen = useKaizenStore()
+const grading = computed(() => {
+  if (!props.instanceId || props.stepIndex == null) return null
+  return kaizen.gradingForStep(props.instanceId, props.stepIndex)
+})
+// Load the run's gradings once when we have an id and nothing cached yet. The stream
+// keeps them live afterwards.
+watch(
+  () => props.instanceId,
+  (id) => {
+    if (id && kaizen.gradingsFor(id).length === 0 && kaizen.available !== false) {
+      void kaizen.loadForExecution(id)
+    }
+  },
+  { immediate: true },
+)
+const tone = computed(() => {
+  const g = grading.value
+  if (!g || g.grade == null) return 'text-slate-400'
+  if (g.grade >= 5) return 'text-emerald-400'
+  if (g.grade >= 4) return 'text-lime-400'
+  if (g.grade === 3) return 'text-amber-400'
+  return 'text-rose-400'
+})
+</script>
+<template>
+  <section v-if="grading" class="rounded-xl border border-slate-800 bg-slate-900/50 p-4">
+    <div class="flex items-center gap-2">
+      <UIcon name="i-lucide-sparkles" class="h-4 w-4 text-teal-400" />
+      <h3 class="text-sm font-semibold text-slate-200">Kaizen grading</h3>
+      <span class="ml-auto flex items-center gap-1.5 text-xs">
+        <template v-if="grading.status === 'scheduled'">
+          <UIcon name="i-lucide-clock" class="h-3.5 w-3.5 text-slate-500" />
+          <span class="text-slate-400">Scheduled</span>
+        </template>
+        <template v-else-if="grading.status === 'running'">
+          <UIcon name="i-lucide-loader-circle" class="h-3.5 w-3.5 animate-spin text-teal-400" />
+          <span class="text-teal-300">Grading…</span>
+        </template>
+        <template v-else-if="grading.status === 'failed'">
+          <UIcon name="i-lucide-circle-alert" class="h-3.5 w-3.5 text-rose-400" />
+          <span class="text-rose-400">Failed</span>
+        </template>
+        <template v-else>
+          <span class="font-semibold" :class="tone">{{ grading.grade }}/5</span>
+        </template>
+      </span>
+    </div>
+    <p v-if="grading.status === 'scheduled'" class="mt-2 text-[11px] text-slate-500">
+      A Kaizen grading is queued for this step. It runs in the background after the run.
+    </p>
+    <template v-else-if="grading.status === 'complete'">
+      <p v-if="grading.summary" class="mt-2 text-xs text-slate-300">{{ grading.summary }}</p>
+      <div v-if="grading.recommendations.length" class="mt-2">
+        <p class="text-[11px] font-medium uppercase tracking-wide text-slate-500">
+          Recommendations
+        </p>
+        <ul class="mt-1 list-disc space-y-0.5 pl-4 text-xs text-slate-300">
+          <li v-for="(r, i) in grading.recommendations" :key="i">{{ r }}</li>
+        </ul>
+      </div>
+      <p v-else class="mt-2 text-[11px] text-emerald-400/80">
+        Smooth interaction — nothing to improve.
+      </p>
+      <p v-if="grading.graderModel" class="mt-2 text-[10px] text-slate-600">
+        Graded by {{ grading.graderModel }}
+      </p>
+    </template>
+    <p v-else-if="grading.status === 'failed'" class="mt-2 text-[11px] text-rose-400/80">
+      {{ grading.error ?? 'The grading could not be completed.' }}
+    </p>
+  </section>
+</template>

package/app/components/layout/SideBar.vue CHANGED Viewed

@@ -138,6 +138,18 @@ watch(
         >
           Sandbox
         </UButton>
+        <!-- The Kaizen screen: grading history + verified prompt/agent/model combos. -->
+        <UButton
+          block
+          color="primary"
+          variant="soft"
+          size="sm"
+          icon="i-lucide-sparkles"
+          class="justify-start"
+          @click="ui.openKaizen()"
+        >
+          Kaizen
+        </UButton>
       </div>
     </section>

package/app/components/panels/AgentStepDetail.vue CHANGED Viewed

@@ -313,6 +313,12 @@ async function copyOutput() {
                 />
               </section>
+              <!-- post-run Kaizen grading status + results for this step (run-details only) -->
+              <KaizenStepStatus
+                :instance-id="ctx?.instanceId ?? null"
+                :step-index="ctx?.stepIndex ?? null"
+              />
               <!-- companion rework budget spent: the shared iteration-cap decision
                    (one more round / proceed with the current output / stop & reset) -->
               <IterationCapPrompt

package/app/components/settings/WorkspaceSettingsPanel.vue CHANGED Viewed

@@ -68,6 +68,7 @@ const draft = reactive({
   taskLimitShared: 5 as number,
   perType: {} as Record<CreateTaskType, number>,
   storeAgentContext: true,
+  kaizenEnabled: true,
   // Budget: empty string ⇒ "use the built-in default" (null on the wire).
   spendCurrency: '',
   spendMonthlyLimit: '',
@@ -81,6 +82,7 @@ function hydrate() {
   const pt = s.taskLimitPerType ?? {}
   for (const t of TASK_TYPES) draft.perType[t] = pt[t] ?? 3
   draft.storeAgentContext = s.storeAgentContext
+  draft.kaizenEnabled = s.kaizenEnabled
   draft.spendCurrency = s.spendCurrency ?? ''
   draft.spendMonthlyLimit = s.spendMonthlyLimit == null ? '' : String(s.spendMonthlyLimit)
 }
@@ -107,6 +109,7 @@ async function save() {
             )
           : null,
       storeAgentContext: draft.storeAgentContext,
+      kaizenEnabled: draft.kaizenEnabled,
     })
     toast.add({ title: 'Settings saved', icon: 'i-lucide-check', color: 'success' })
   } catch (e) {
@@ -237,6 +240,23 @@ async function saveBudget() {
               </label>
             </section>
+            <!-- Kaizen agent -->
+            <section class="space-y-2">
+              <h3 class="text-sm font-semibold text-slate-200">Kaizen agent</h3>
+              <p class="text-[11px] text-slate-400">
+                After each run completes, the Kaizen agent grades how every agent step went — smooth
+                and efficient vs confused and chaotic — and recommends prompt/model improvements. A
+                prompt + agent + model combination that grades highly with no recommendations five
+                times in a row is marked verified and is no longer graded. Grading runs in the
+                background and is shown inside run details and the Kaizen screen. Set the grader's
+                model in Model Configuration (the “Kaizen” agent).
+              </p>
+              <label class="flex items-center gap-2">
+                <USwitch v-model="draft.kaizenEnabled" size="sm" />
+                <span class="text-sm text-slate-200">Grade agent runs with Kaizen</span>
+              </label>
+            </section>
             <div class="flex justify-end">
               <UButton
                 color="primary"

package/app/composables/api/kaizen.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import type { KaizenGrading, KaizenOverview } from '~/types/domain'
+import type { ApiContext } from './context'
+/** Kaizen (post-run grading) read endpoints: the screen overview + a run's gradings. */
+export function kaizenApi({ http, ws }: ApiContext) {
+  return {
+    // The Kaizen screen: recent grading history + the verified-combo library.
+    getKaizenOverview: (workspaceId: string) => http<KaizenOverview>(`${ws(workspaceId)}/kaizen`),
+    // The gradings recorded for one run (the run-window status surface).
+    getKaizenForExecution: (workspaceId: string, executionId: string) =>
+      http<{ gradings: KaizenGrading[] }>(
+        `${ws(workspaceId)}/executions/${encodeURIComponent(executionId)}/kaizen`,
+      ),
+  }
+}

package/app/composables/useApi.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { executionApi } from './api/execution'
 import { fragmentsApi } from './api/fragments'
 import { githubApi } from './api/github'
 import { humanTestApi } from './api/humanTest'
+import { kaizenApi } from './api/kaizen'
 import { modelsApi } from './api/models'
 import { notificationsApi } from './api/notifications'
 import { presetsApi } from './api/presets'
@@ -85,6 +86,7 @@ export function useApi() {
     ...tasksApi(ctx),
     ...reviewsApi(ctx),
     ...humanTestApi(ctx),
+    ...kaizenApi(ctx),
     ...specApi(ctx),
     ...notificationsApi(ctx),
     ...presetsApi(ctx),

package/app/composables/useWorkspaceStream.ts CHANGED Viewed

@@ -23,6 +23,7 @@ export function useWorkspaceStream() {
   const requirements = useRequirementsStore()
   const consensus = useConsensusStore()
   const clarity = useClarityStore()
+  const kaizen = useKaizenStore()
   const api = useApi()
   const apiBase = useRuntimeConfig().public.apiBase
@@ -87,6 +88,11 @@ export function useWorkspaceStream() {
       // cache so an open review window / inspector reflects it live ("incorporating…" → the
       // next cycle / converged). The summons back, when needed, arrives as a `notification`.
       clarity.upsert(event.review)
+    } else if (event.type === 'kaizen') {
+      // A post-run Kaizen grading was scheduled, started or completed — fold it into the
+      // run cache (so an open run window shows scheduled→running→complete live) and the
+      // Kaizen screen history. Never surfaced on the board.
+      kaizen.upsert(event.grading)
     }
   }

package/app/pages/index.vue CHANGED Viewed

@@ -11,6 +11,7 @@ import DecisionModal from '~/components/panels/DecisionModal.vue'
 import AgentStepDetail from '~/components/panels/AgentStepDetail.vue'
 import StepResultViewHost from '~/components/panels/StepResultViewHost.vue'
 import ObservabilityPanel from '~/components/panels/ObservabilityPanel.vue'
+import KaizenPanel from '~/components/kaizen/KaizenPanel.vue'
 import BlockFocusView from '~/components/focus/BlockFocusView.vue'
 import DocumentSourceConnectModal from '~/components/documents/DocumentSourceConnectModal.vue'
 import DocumentImportModal from '~/components/documents/DocumentImportModal.vue'
@@ -171,6 +172,7 @@ watch(
       <AgentStepDetail />
       <StepResultViewHost />
       <ObservabilityPanel />
+      <KaizenPanel />
       <DocumentSourceConnectModal />
       <DocumentImportModal />
       <SpawnPreviewModal />

package/app/stores/kaizen.ts ADDED Viewed

@@ -0,0 +1,101 @@
+import { defineStore } from 'pinia'
+import { computed, ref } from 'vue'
+import type { KaizenGrading, KaizenVerifiedCombo } from '~/types/domain'
+import { useWorkspaceStore } from '~/stores/workspace'
+/**
+ * Kaizen state: per-run gradings (for the run-window status surface) and the
+ * workspace-wide history + verified-combo library (for the Kaizen screen). Gradings
+ * arrive both via lazy loads and live over the workspace stream (`upsert`). Never
+ * surfaced on the board — only inside run details + the dedicated screen.
+ */
+export const useKaizenStore = defineStore('kaizen', () => {
+  const api = useApi()
+  /** Gradings keyed by run (execution) id, for the run window. */
+  const byExecution = ref<Record<string, KaizenGrading[]>>({})
+  /** Recent grading history for the Kaizen screen. */
+  const history = ref<KaizenGrading[]>([])
+  /** The verified-combo library for the Kaizen screen. */
+  const verified = ref<KaizenVerifiedCombo[]>([])
+  const loadingOverview = ref(false)
+  const loadingExecution = ref<Set<string>>(new Set())
+  /** 503 ⇒ the Kaizen feature isn't configured on this deployment. */
+  const available = ref<boolean | null>(null)
+  function gradingsFor(executionId: string): KaizenGrading[] {
+    return byExecution.value[executionId] ?? []
+  }
+  /** The grading for a specific step of a run, if any. */
+  function gradingForStep(executionId: string, stepIndex: number): KaizenGrading | null {
+    return gradingsFor(executionId).find((g) => g.stepIndex === stepIndex) ?? null
+  }
+  async function loadOverview() {
+    const ws = useWorkspaceStore()
+    loadingOverview.value = true
+    try {
+      const overview = await api.getKaizenOverview(ws.requireId())
+      history.value = overview.gradings
+      verified.value = overview.verified
+      available.value = true
+    } catch (e) {
+      if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
+        available.value = false
+      else throw e
+    } finally {
+      loadingOverview.value = false
+    }
+  }
+  async function loadForExecution(executionId: string) {
+    const ws = useWorkspaceStore()
+    loadingExecution.value = new Set(loadingExecution.value).add(executionId)
+    try {
+      const { gradings } = await api.getKaizenForExecution(ws.requireId(), executionId)
+      byExecution.value = { ...byExecution.value, [executionId]: gradings }
+      available.value = true
+    } catch (e) {
+      if ((e as { statusCode?: number; status?: number })?.statusCode === 503)
+        available.value = false
+      else throw e
+    } finally {
+      const next = new Set(loadingExecution.value)
+      next.delete(executionId)
+      loadingExecution.value = next
+    }
+  }
+  /** Fold a grading pushed over the stream into both the run cache and the screen history. */
+  function upsert(grading: KaizenGrading) {
+    const current = byExecution.value[grading.executionId] ?? []
+    const replaced = current.some((g) => g.id === grading.id)
+    const nextRun = replaced
+      ? current.map((g) => (g.id === grading.id ? grading : g))
+      : [...current, grading]
+    byExecution.value = { ...byExecution.value, [grading.executionId]: nextRun }
+    // Keep the screen history live too (newest first), if it's been loaded.
+    const inHistory = history.value.some((g) => g.id === grading.id)
+    if (inHistory) history.value = history.value.map((g) => (g.id === grading.id ? grading : g))
+    else history.value = [grading, ...history.value]
+  }
+  const isLoadingExecution = (executionId: string) => loadingExecution.value.has(executionId)
+  const verifiedCount = computed(() => verified.value.filter((c) => c.verified).length)
+  return {
+    byExecution,
+    history,
+    verified,
+    available,
+    loadingOverview,
+    verifiedCount,
+    gradingsFor,
+    gradingForStep,
+    loadOverview,
+    loadForExecution,
+    upsert,
+    isLoadingExecution,
+  }
+})

package/app/stores/ui.ts CHANGED Viewed

@@ -149,6 +149,10 @@ export const useUiStore = defineStore('ui', () => {
   // per-call detail from the observability store on open.
   const observabilityInstanceId = ref<string | null>(null)
+  // The Kaizen screen (grading history + verified-combo library), a full-panel overlay
+  // opened from the sidebar. Distinct from the per-run grading status shown in run details.
+  const kaizenScreenOpen = ref(false)
   /** Current canvas zoom (driven by Vue Flow viewport). */
   const zoom = ref(1)
@@ -473,6 +477,12 @@ export const useUiStore = defineStore('ui', () => {
   function closeObservability() {
     observabilityInstanceId.value = null
   }
+  function openKaizen() {
+    kaizenScreenOpen.value = true
+  }
+  function closeKaizen() {
+    kaizenScreenOpen.value = false
+  }
   return {
     selectedBlockId,
@@ -513,6 +523,7 @@ export const useUiStore = defineStore('ui', () => {
     closeResultView,
     stepDetail,
     observabilityInstanceId,
+    kaizenScreenOpen,
     zoom,
     lod,
     expandedFrames,
@@ -589,5 +600,7 @@ export const useUiStore = defineStore('ui', () => {
     closeStepDetail,
     openObservability,
     closeObservability,
+    openKaizen,
+    closeKaizen,
   }
 })

package/app/stores/workspaceSettings.ts CHANGED Viewed

@@ -10,6 +10,7 @@ const DEFAULTS: WorkspaceSettings = {
   taskLimitShared: null,
   taskLimitPerType: null,
   storeAgentContext: true,
+  kaizenEnabled: true,
   spendCurrency: null,
   spendMonthlyLimit: null,
 }

package/app/types/domain.ts CHANGED Viewed

@@ -309,6 +309,9 @@ export type AgentKind =
   // validate the change in a live URL, dispatching the Tester's `fixer` (from findings) or
   // the `conflict-resolver` (on a conflicting pull-main) on demand. Opens its own window.
   | 'human-test'
+  // The Kaizen agent: post-run grader (NOT a pipeline step / palette archetype). Surfaced
+  // only in Model Configuration (its model is pinnable like any agent) and run details.
+  | 'kaizen'
 /** A draggable agent definition shown in the agent palette. */
 /** Palette grouping for the agent archetypes (collapsible sections in the builder). */
@@ -495,6 +498,8 @@ export interface WorkspaceSettings {
   taskLimitPerType: Partial<Record<CreateTaskType, number>> | null
   /** Whether to store the complete provided-context snapshot for each container agent. */
   storeAgentContext: boolean
+  /** Whether the Kaizen agent grades agent steps after each run. On by default. */
+  kaizenEnabled: boolean
   /** Spend budget currency (ISO 4217). Null ⇒ the built-in default (`EUR`). */
   spendCurrency: string | null
   /** Monthly spend budget in `spendCurrency`. Null ⇒ the built-in default. */
@@ -508,6 +513,7 @@ export interface UpdateWorkspaceSettingsInput {
   taskLimitShared?: number | null
   taskLimitPerType?: Partial<Record<CreateTaskType, number>> | null
   storeAgentContext?: boolean
+  kaizenEnabled?: boolean
   spendCurrency?: string | null
   spendMonthlyLimit?: number | null
 }
@@ -520,6 +526,50 @@ export interface ServiceFragmentDefaults {
   fragmentIds: string[]
 }
+/** Lifecycle of a Kaizen grading. Mirrors `@cat-factory/contracts`. */
+export type KaizenGradingStatus = 'scheduled' | 'running' | 'complete' | 'failed'
+/**
+ * A Kaizen grading of one completed agent step (how smooth/efficient the interaction
+ * was, 1..5, plus recommendations). Mirrors `@cat-factory/contracts`.
+ */
+export interface KaizenGrading {
+  id: string
+  executionId: string
+  blockId: string
+  stepIndex: number
+  agentKind: string
+  model: string
+  promptVersion: number
+  comboKey: string
+  status: KaizenGradingStatus
+  grade: number | null
+  summary: string
+  recommendations: string[]
+  graderModel: string | null
+  error: string | null
+  createdAt: number
+  updatedAt: number
+}
+/** A `(promptVersion, agentKind, model)` combo's verification progress. */
+export interface KaizenVerifiedCombo {
+  comboKey: string
+  agentKind: string
+  model: string
+  promptVersion: number
+  consecutiveHighGrades: number
+  verified: boolean
+  verifiedAt: number | null
+  updatedAt: number
+}
+/** The Kaizen screen payload: recent grading history + the verified-combo library. */
+export interface KaizenOverview {
+  gradings: KaizenGrading[]
+  verified: KaizenVerifiedCombo[]
+}
 /**
  * Real-time events pushed over the workspace WebSocket stream (see
  * `useWorkspaceStream`). Mirrors `WorkspaceEvent` in `@cat-factory/contracts`.
@@ -533,6 +583,7 @@ export type WorkspaceEvent =
   | { type: 'requirements'; review: RequirementReview; at: number }
   | { type: 'consensus'; session: ConsensusSession; at: number }
   | { type: 'clarity'; review: ClarityReview; at: number }
+  | { type: 'kaizen'; grading: KaizenGrading; at: number }
 /** Level-of-detail buckets driven by the canvas zoom level. Shallow → deep:
  * `far`/`mid`/`close` govern a service frame (chip → card → opened with tasks);

package/app/utils/catalog.ts CHANGED Viewed

@@ -330,6 +330,17 @@ export const SYSTEM_AGENT_META: Record<string, AgentArchetype> = {
     color: '#a3e635',
     description: 'Scores the PR and auto-merges within the task thresholds, or asks for review.',
   },
+  // The Kaizen agent grades agent steps AFTER a run completes (continuous improvement).
+  // It is NOT a pipeline step (never in the palette — no `category`), but it runs an LLM,
+  // so it needs display metadata here and a per-workspace model in Model Configuration.
+  kaizen: {
+    kind: 'kaizen',
+    label: 'Kaizen',
+    icon: 'i-lucide-sparkles',
+    color: '#2dd4bf',
+    description:
+      'Grades each completed agent step (smooth vs chaotic) after a run and recommends prompt/model improvements.',
+  },
   // A polling gate (no model of its own) that watches the released PR's observability
   // signals after merge and escalates to the on-call agent on a regression. NOT in any
   // default pipeline and NOT a standing palette archetype — the palette surfaces it
@@ -365,9 +376,15 @@ export const OBSERVABILITY_GATE_ARCHETYPE: AgentArchetype =
  * default model would do nothing for them.
  */
 export const MODEL_CONFIGURABLE_SYSTEM_KINDS: AgentArchetype[] = [
-  ...['spec-writer', 'blueprints', 'conflict-resolver', 'ci-fixer', 'fixer', 'merger'].map(
-    (kind) => SYSTEM_AGENT_META[kind]!,
-  ),
+  ...[
+    'spec-writer',
+    'blueprints',
+    'conflict-resolver',
+    'ci-fixer',
+    'fixer',
+    'merger',
+    'kaizen',
+  ].map((kind) => SYSTEM_AGENT_META[kind]!),
   // Companions run LLMs but aren't palette-addable (they're producer toggles), so include
   // them here to keep their per-workspace default model pinnable in the Model Defaults panel.
   ...COMPANION_ARCHETYPES,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cat-factory/app",
-  "version": "0.30.5",
+  "version": "0.31.0",
   "description": "Reusable Nuxt layer for the Agent Architecture Board SPA (components, stores, composables, pages). Consume it from a thin deployment app via `extends: ['@cat-factory/app']` and point it at your backend with NUXT_PUBLIC_API_BASE. See deploy/frontend for an example.",
   "repository": {
     "type": "git",