npm - @swarmclawai/swarmclaw - Versions diffs - 1.0.5 → 1.0.6 - Mend

@swarmclawai/swarmclaw 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/src/app/api/autonomy/incidents/route.ts +19 -0
package/src/app/api/autonomy/reflections/route.ts +19 -0
package/src/app/api/settings/route.ts +3 -0
package/src/app/settings/page.tsx +9 -0
package/src/cli/index.js +8 -0
package/src/cli/spec.js +7 -0
package/src/lib/autonomy/supervisor-settings.ts +80 -0
package/src/lib/server/agents/main-agent-loop-advanced.test.ts +35 -0
package/src/lib/server/agents/main-agent-loop.ts +45 -8
package/src/lib/server/autonomy/supervisor-reflection.test.ts +279 -0
package/src/lib/server/autonomy/supervisor-reflection.ts +817 -0
package/src/lib/server/memory/temporal-decay.ts +6 -0
package/src/lib/server/runtime/queue.ts +118 -12
package/src/lib/server/runtime/session-run-manager.ts +51 -1
package/src/lib/server/storage.ts +27 -1
package/src/types/index.ts +57 -0
package/src/views/settings/section-supervisor-reflection.tsx +148 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@swarmclawai/swarmclaw",
-  "version": "1.0.5",
+  "version": "1.0.6",
   "description": "Self-hosted AI orchestration control plane for OpenClaw, multi-agent workflows, runtime skills, crypto wallets, and chat platform connectors.",
   "license": "MIT",
   "publishConfig": {

package/src/app/api/autonomy/incidents/route.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { NextResponse } from 'next/server'
+import { listSupervisorIncidents } from '@/lib/server/autonomy/supervisor-reflection'
+export const dynamic = 'force-dynamic'
+function parseLimit(value: string | null): number | undefined {
+  if (!value) return undefined
+  const parsed = Number.parseInt(value, 10)
+  return Number.isFinite(parsed) ? parsed : undefined
+}
+export async function GET(req: Request) {
+  const url = new URL(req.url)
+  const sessionId = url.searchParams.get('sessionId') || undefined
+  const taskId = url.searchParams.get('taskId') || undefined
+  const limit = parseLimit(url.searchParams.get('limit'))
+  return NextResponse.json(listSupervisorIncidents({ sessionId, taskId, limit }))
+}

package/src/app/api/autonomy/reflections/route.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { NextResponse } from 'next/server'
+import { listRunReflections } from '@/lib/server/autonomy/supervisor-reflection'
+export const dynamic = 'force-dynamic'
+function parseLimit(value: string | null): number | undefined {
+  if (!value) return undefined
+  const parsed = Number.parseInt(value, 10)
+  return Number.isFinite(parsed) ? parsed : undefined
+}
+export async function GET(req: Request) {
+  const url = new URL(req.url)
+  const sessionId = url.searchParams.get('sessionId') || undefined
+  const taskId = url.searchParams.get('taskId') || undefined
+  const limit = parseLimit(url.searchParams.get('limit'))
+  return NextResponse.json(listRunReflections({ sessionId, taskId, limit }))
+}

package/src/app/api/settings/route.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { normalizeHeartbeatSettingFields } from '@/lib/runtime/heartbeat-default
 import { normalizeWhatsAppApprovedContacts } from '@/lib/server/connectors/pairing'
 import { loadPublicSettings, loadSettings, saveSettings } from '@/lib/server/storage'
 import { normalizeRuntimeSettingFields } from '@/lib/runtime/runtime-loop'
+import { normalizeSupervisorSettings } from '@/lib/autonomy/supervisor-settings'
 export const dynamic = 'force-dynamic'
@@ -85,6 +86,7 @@ export async function PUT(req: Request) {
   )
   const normalizedRuntime = normalizeRuntimeSettingFields(settings)
   const normalizedHeartbeat = normalizeHeartbeatSettingFields(settings)
+  const normalizedSupervisor = normalizeSupervisorSettings(settings)
   const nextResponseCacheTtlSec = parseIntSetting(
     settings.responseCacheTtlSec,
     15 * 60,
@@ -118,6 +120,7 @@ export async function PUT(req: Request) {
   settings.maxLinkedMemoriesExpanded = nextLinked
   Object.assign(settings, normalizedRuntime)
   Object.assign(settings, normalizedHeartbeat)
+  Object.assign(settings, normalizedSupervisor)
   settings.responseCacheTtlSec = nextResponseCacheTtlSec
   settings.responseCacheMaxEntries = nextResponseCacheMaxEntries
   settings.responseCacheEnabled = parseBoolSetting(settings.responseCacheEnabled, true)

package/src/app/settings/page.tsx CHANGED Viewed

@@ -10,6 +10,7 @@ import { UserPreferencesSection } from '@/views/settings/section-user-preference
 import { ThemeSection } from '@/views/settings/section-theme'
 import { OrchestratorSection } from '@/views/settings/section-orchestrator'
 import { RuntimeLoopSection } from '@/views/settings/section-runtime-loop'
+import { SupervisorReflectionSection } from '@/views/settings/section-supervisor-reflection'
 import { CapabilityPolicySection } from '@/views/settings/section-capability-policy'
 import { WalletsSection } from '@/views/settings/section-wallets'
 import { StorageSection } from '@/views/settings/section-storage'
@@ -189,6 +190,14 @@ export default function SettingsRoute() {
       keywords: ['heartbeat', 'follow up', 'interval', 'ongoing'],
       render: () => <HeartbeatSection {...sectionProps} />,
     },
+    {
+      id: 'supervisor-reflection',
+      tabId: 'agents',
+      title: 'Supervisor & Reflection',
+      description: 'Automatic recovery from bad loops plus post-run reflection memory.',
+      keywords: ['supervisor', 'reflection', 'autonomy', 'memory', 'self-learning', 'replan'],
+      render: () => <SupervisorReflectionSection {...sectionProps} />,
+    },
     {
       id: 'embedding',
       tabId: 'memory',

package/src/cli/index.js CHANGED Viewed

@@ -44,6 +44,14 @@ const COMMAND_GROUPS = [
       }),
     ],
   },
+  {
+    name: 'autonomy',
+    description: 'Inspect supervisor incidents and reflection output',
+    commands: [
+      cmd('incidents', 'GET', '/autonomy/incidents', 'List supervisor incidents (use --query sessionId=..., --query taskId=..., --query limit=50)'),
+      cmd('reflections', 'GET', '/autonomy/reflections', 'List run reflections (use --query sessionId=..., --query taskId=..., --query limit=50)'),
+    ],
+  },
   {
     name: 'approvals',
     description: 'List and resolve human-loop approvals',

package/src/cli/spec.js CHANGED Viewed

@@ -25,6 +25,13 @@ const COMMAND_GROUPS = {
       login: { description: 'Validate an access key', method: 'POST', path: '/auth' },
     },
   },
+  autonomy: {
+    description: 'Autonomy supervisor inspection',
+    commands: {
+      incidents: { description: 'List supervisor incidents (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/incidents' },
+      reflections: { description: 'List run reflections (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/reflections' },
+    },
+  },
   approvals: {
     description: 'List and resolve human-loop approvals',
     commands: {

package/src/lib/autonomy/supervisor-settings.ts ADDED Viewed

@@ -0,0 +1,80 @@
+import type { AppSettings } from '@/types'
+export type AutonomyRuntimeScope = 'chat' | 'task' | 'both'
+export const DEFAULT_SUPERVISOR_ENABLED = true
+export const DEFAULT_SUPERVISOR_RUNTIME_SCOPE: AutonomyRuntimeScope = 'both'
+export const DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT = 2
+export const DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT = 3
+export const DEFAULT_REFLECTION_ENABLED = true
+export const DEFAULT_REFLECTION_AUTO_WRITE_MEMORY = true
+export const SUPERVISOR_NO_PROGRESS_LIMIT_MIN = 1
+export const SUPERVISOR_NO_PROGRESS_LIMIT_MAX = 8
+export const SUPERVISOR_REPEATED_TOOL_LIMIT_MIN = 2
+export const SUPERVISOR_REPEATED_TOOL_LIMIT_MAX = 8
+function parseIntSetting(value: unknown, fallback: number, min: number, max: number): number {
+  const parsed = typeof value === 'number'
+    ? value
+    : typeof value === 'string'
+      ? Number.parseInt(value, 10)
+      : Number.NaN
+  if (!Number.isFinite(parsed)) return fallback
+  return Math.max(min, Math.min(max, Math.trunc(parsed)))
+}
+function parseBoolSetting(value: unknown, fallback: boolean): boolean {
+  if (typeof value === 'boolean') return value
+  if (typeof value === 'string') {
+    const normalized = value.trim().toLowerCase()
+    if (['1', 'true', 'yes', 'on'].includes(normalized)) return true
+    if (['0', 'false', 'no', 'off'].includes(normalized)) return false
+  }
+  return fallback
+}
+export interface NormalizedSupervisorSettings {
+  supervisorEnabled: boolean
+  supervisorRuntimeScope: AutonomyRuntimeScope
+  supervisorNoProgressLimit: number
+  supervisorRepeatedToolLimit: number
+  reflectionEnabled: boolean
+  reflectionAutoWriteMemory: boolean
+}
+export function normalizeSupervisorSettings(
+  settings: Partial<AppSettings> | NormalizedSupervisorSettings | Record<string, unknown> | null | undefined,
+): NormalizedSupervisorSettings {
+  const current = settings || {}
+  const runtimeScope = current.supervisorRuntimeScope === 'chat'
+    || current.supervisorRuntimeScope === 'task'
+    || current.supervisorRuntimeScope === 'both'
+    ? current.supervisorRuntimeScope
+    : DEFAULT_SUPERVISOR_RUNTIME_SCOPE
+  return {
+    supervisorEnabled: parseBoolSetting(current.supervisorEnabled, DEFAULT_SUPERVISOR_ENABLED),
+    supervisorRuntimeScope: runtimeScope,
+    supervisorNoProgressLimit: parseIntSetting(
+      current.supervisorNoProgressLimit,
+      DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT,
+      SUPERVISOR_NO_PROGRESS_LIMIT_MIN,
+      SUPERVISOR_NO_PROGRESS_LIMIT_MAX,
+    ),
+    supervisorRepeatedToolLimit: parseIntSetting(
+      current.supervisorRepeatedToolLimit,
+      DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT,
+      SUPERVISOR_REPEATED_TOOL_LIMIT_MIN,
+      SUPERVISOR_REPEATED_TOOL_LIMIT_MAX,
+    ),
+    reflectionEnabled: parseBoolSetting(current.reflectionEnabled, DEFAULT_REFLECTION_ENABLED),
+    reflectionAutoWriteMemory: parseBoolSetting(current.reflectionAutoWriteMemory, DEFAULT_REFLECTION_AUTO_WRITE_MEMORY),
+  }
+}
+export function runtimeScopeIncludes(
+  runtimeScope: AutonomyRuntimeScope,
+  surface: 'chat' | 'task',
+): boolean {
+  return runtimeScope === 'both' || runtimeScope === surface
+}

package/src/lib/server/agents/main-agent-loop-advanced.test.ts CHANGED Viewed

@@ -230,6 +230,41 @@ describe('main-agent-loop advanced', () => {
     assert.match(String(output.followupMessage || ''), /Resume from this next action/)
   })
+  it('uses the supervisor followup prompt when chat runs start thrashing on the same tool', () => {
+    const output = runWithTempDataDir(`
+      ${sessionSetupScript()}
+      const followup = mainLoop.handleMainLoopRunResult({
+        runId: 'run-supervisor',
+        sessionId: 'main',
+        message: 'Fix the broken deployment pipeline.',
+        internal: false,
+        source: 'chat',
+        resultText: 'Retried the same shell path several times and got the same failure.',
+        toolEvents: [
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+        ],
+      })
+      const state = mainLoop.getMainLoopStateForSession('main')
+      console.log(JSON.stringify({
+        hasFollowup: followup !== null,
+        followupMessage: followup?.message ?? null,
+        chain: state?.followupChainCount ?? -1,
+        timelineSources: (state?.timeline || []).map((entry) => entry.source),
+        timelineNotes: (state?.timeline || []).map((entry) => entry.note),
+      }))
+    `)
+    assert.equal(output.hasFollowup, true, 'supervisor should queue a recovery followup')
+    assert.equal(output.chain, 1, 'supervisor followup increments the chain')
+    assert.match(String(output.followupMessage || ''), /Supervisor intervention: stop repeating shell/i)
+    assert.ok((output.timelineSources as string[]).includes('supervisor'), 'supervisor interventions should be visible in timeline')
+    assert.ok((output.timelineNotes as string[]).some((note) => /Repeated tool use detected/i.test(String(note))), 'timeline should explain the supervisor trigger')
+  })
   it('persists and upgrades a skill blocker across recommend/install steps', () => {
     const output = runWithTempDataDir(`
       ${sessionSetupScript()}

package/src/lib/server/agents/main-agent-loop.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { hmrSingleton } from '@/lib/shared-utils'
 import type { GoalContract, Message, MessageToolEvent, Session } from '@/types'
 import { mergeGoalContracts, parseGoalContractFromText, parseMainLoopPlan, parseMainLoopReview } from '@/lib/server/agents/autonomy-contract'
+import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
 import { enqueueSystemEvent } from '@/lib/server/runtime/system-events'
 import { loadSessions, loadSettings } from '@/lib/server/storage'
@@ -73,6 +74,7 @@ export interface PushMainLoopEventInput {
 }
 export interface HandleMainLoopRunResultInput {
+  runId?: string
   sessionId: string
   message: string
   internal: boolean
@@ -817,6 +819,8 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
   const state = getOrCreateState(input.sessionId)
   if (!state) return null
+  const sessions = loadSessions()
+  const session = sessions[input.sessionId] as Session | undefined
   const resultText = input.resultText || ''
   const persistedText = stripMainLoopMetaForPersistence(resultText)
   const toolEvents = Array.isArray(input.toolEvents) ? input.toolEvents : []
@@ -892,6 +896,36 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
     state.pendingEvents = []
   }
+  const assessment = assessAutonomyRun({
+    runId: input.runId || `main-loop-${input.sessionId}-${nowTs}`,
+    sessionId: input.sessionId,
+    source: input.source,
+    status: input.error ? 'failed' : 'completed',
+    resultText,
+    error: input.error,
+    toolEvents,
+    mainLoopState: state,
+    session: session || null,
+    settings: loadSettings(),
+  })
+  for (const incident of assessment.incidents) {
+    appendTimeline(
+      state,
+      'supervisor',
+      `Supervisor: ${incident.summary}`,
+      incident.autoAction === 'block' ? 'blocked' : 'reflection',
+    )
+  }
+  const supervisorPrompt = assessment.shouldBlock ? null : assessment.interventionPrompt
+  if (assessment.shouldBlock) {
+    state.status = 'blocked'
+    state.paused = true
+    state.followupChainCount = 0
+    appendTimeline(state, 'supervisor', 'Supervisor paused the run after detecting a hard blocker.', 'blocked')
+  } else if (supervisorPrompt) {
+    state.paused = false
+  }
   const needsReplan = review?.needs_replan === true || ((review?.confidence ?? 1) < 0.45)
   const limit = followupLimit()
   const allowChatOriginFollowup = !input.internal
@@ -900,7 +934,9 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
     && !waitingForExternal
     && !gotTerminalAck
     && (
-      needsReplan
+      !!supervisorPrompt
+      || assessment.shouldBlock
+      || needsReplan
       || heartbeat?.status === 'progress'
       || !!heartbeat?.nextAction
       || (!!plan?.current_step && toolNames.length > 0)
@@ -913,18 +949,19 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
     state.followupChainCount = 0
     if (gotTerminalAck && state.status !== 'blocked') state.status = 'ok'
   } else {
-    const shouldContinue = needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
+    const shouldContinue = !!supervisorPrompt || needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
     if (shouldContinue && state.followupChainCount < limit) {
       state.followupChainCount += 1
-      const message = needsReplan
-        ? 'Replan from the latest outcome, then execute only the highest-value remaining step. Do not repeat completed work.'
-        : state.nextAction
-          ? `Continue the objective. Resume from this next action: ${state.nextAction}`
-          : 'Continue the objective and finish the next highest-value remaining step.'
+      const message = supervisorPrompt
+        || (needsReplan
+          ? 'Replan from the latest outcome, then execute only the highest-value remaining step. Do not repeat completed work.'
+          : state.nextAction
+            ? `Continue the objective. Resume from this next action: ${state.nextAction}`
+            : 'Continue the objective and finish the next highest-value remaining step.')
       followup = {
         message,
         delayMs: DEFAULT_FOLLOWUP_DELAY_MS,
-        dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${state.currentPlanStep || state.nextAction || 'continue'}`,
+        dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${supervisorPrompt ? 'supervisor' : (state.currentPlanStep || state.nextAction || 'continue')}`,
       }
       appendTimeline(state, 'followup', message, 'progress')
     } else {

package/src/lib/server/autonomy/supervisor-reflection.test.ts ADDED Viewed

@@ -0,0 +1,279 @@
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import { spawnSync } from 'node:child_process'
+import { describe, it } from 'node:test'
+import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
+const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../../..')
+function runWithTempDataDir(script: string) {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-supervisor-reflection-'))
+  try {
+    const result = spawnSync(
+      process.execPath,
+      ['--import', 'tsx', '--input-type=module', '--eval', script],
+      {
+        cwd: repoRoot,
+        env: {
+          ...process.env,
+          DATA_DIR: tempDir,
+          WORKSPACE_DIR: path.join(tempDir, 'workspace'),
+          SWARMCLAW_BUILD_MODE: '1',
+        },
+        encoding: 'utf-8',
+        timeout: 20000,
+      },
+    )
+    assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
+    const lines = (result.stdout || '')
+      .trim()
+      .split('\n')
+      .map((line) => line.trim())
+      .filter(Boolean)
+    const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
+    return JSON.parse(jsonLine || '{}') as Record<string, unknown>
+  } finally {
+    fs.rmSync(tempDir, { recursive: true, force: true })
+  }
+}
+describe('supervisor-reflection', () => {
+  it('recommends an automatic supervisor recovery step for repeated tool thrash', () => {
+    const assessment = assessAutonomyRun({
+      runId: 'run-1',
+      sessionId: 'session-1',
+      source: 'chat',
+      status: 'completed',
+      resultText: 'Retried the same shell command and got the same output.',
+      toolEvents: [
+        { name: 'shell', input: '{"cmd":"npm test"}' },
+        { name: 'shell', input: '{"cmd":"npm test"}' },
+        { name: 'shell', input: '{"cmd":"npm test"}' },
+      ],
+      mainLoopState: {
+        followupChainCount: 1,
+        summary: 'Retried the same shell command and got the same output.',
+      },
+      settings: {
+        supervisorEnabled: true,
+        supervisorRuntimeScope: 'both',
+        supervisorRepeatedToolLimit: 3,
+        supervisorNoProgressLimit: 2,
+        reflectionEnabled: true,
+        reflectionAutoWriteMemory: true,
+      },
+      session: {
+        id: 'session-1',
+        name: 'Autonomy Test',
+        cwd: process.cwd(),
+        user: 'tester',
+        provider: 'openai',
+        model: 'gpt-test',
+        claudeSessionId: null,
+        messages: [],
+        createdAt: Date.now(),
+        lastActiveAt: Date.now(),
+      } as any,
+    })
+    assert.ok(assessment.incidents.some((incident) => incident.kind === 'repeated_tool'))
+    assert.match(String(assessment.interventionPrompt || ''), /stop repeating shell/i)
+    assert.equal(assessment.shouldBlock, false)
+  })
+  it('persists reflections and auto-written reflection memory', () => {
+    const output = runWithTempDataDir(`
+      const storageMod = await import('@/lib/server/storage')
+      const storage = storageMod.default || storageMod['module.exports'] || storageMod
+      const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
+      const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
+      const memoryDbMod = await import('@/lib/server/memory/memory-db')
+      const memoryMod = memoryDbMod.default || memoryDbMod['module.exports'] || memoryDbMod
+      storage.saveAgents({
+        'agent-a': {
+          id: 'agent-a',
+          name: 'Agent A',
+          provider: 'openai',
+          model: 'gpt-test',
+        },
+      })
+      storage.saveSessions({
+        s1: {
+          id: 's1',
+          name: 'Autonomy Session',
+          cwd: process.cwd(),
+          user: 'tester',
+          provider: 'openai',
+          model: 'gpt-test',
+          claudeSessionId: null,
+          messages: [
+            { role: 'user', text: 'Repair the deployment workflow and keep notes for later.', time: 1 },
+            { role: 'assistant', text: 'I retried the same shell path and nothing changed.', time: 2 },
+          ],
+          createdAt: 1,
+          lastActiveAt: 2,
+          sessionType: 'human',
+          agentId: 'agent-a',
+        },
+      })
+      storage.saveSettings({
+        supervisorEnabled: true,
+        supervisorRuntimeScope: 'both',
+        supervisorNoProgressLimit: 2,
+        supervisorRepeatedToolLimit: 3,
+        reflectionEnabled: true,
+        reflectionAutoWriteMemory: true,
+      })
+      const result = await mod.observeAutonomyRunOutcome({
+        runId: 'run-1',
+        sessionId: 's1',
+        agentId: 'agent-a',
+        source: 'chat',
+        status: 'completed',
+        resultText: 'I retried the same shell path and nothing changed.',
+        toolEvents: [
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+          { name: 'shell', input: '{"cmd":"npm test"}' },
+        ],
+        mainLoopState: {
+          followupChainCount: 2,
+          summary: 'I retried the same shell path and nothing changed.',
+        },
+        sourceMessage: 'Repair the deployment workflow and keep notes for later.',
+      }, {
+        generateText: async () => JSON.stringify({
+          summary: 'Deployment repair reflection',
+          invariants: ['Verify changed files and command output before marking the task complete.'],
+          derived: ['Switch recovery strategy after two identical shell failures in a row.'],
+          failures: ['Repeated shell retries without changing inputs waste budget.'],
+          lessons: ['Capture a short recovery brief before continuing a stuck run.'],
+          communication: ['Keep execution updates concise when reporting repair progress.'],
+          relationship: ['Treat the user as wanting decisive recovery rather than repeated status chatter.'],
+          significant_events: ['The deployment workflow is currently broken and needs a confirmed repair path.'],
+          profile: ['The user is directly responsible for the deployment workflow.'],
+          boundaries: ['Do not claim the repair is complete without concrete verification evidence.'],
+          open_loops: ['Follow up with the final verification result once the repair path succeeds.'],
+        }),
+      })
+      const memories = memoryMod.getMemoryDb().list(undefined, 50)
+        .filter((entry) => entry.metadata && entry.metadata.origin === 'autonomy-reflection')
+      console.log(JSON.stringify({
+        incidentKinds: result.incidents.map((incident) => incident.kind).sort(),
+        reflectionSummary: result.reflection?.summary ?? null,
+        reflectionCount: mod.listRunReflections({ sessionId: 's1' }).length,
+        autoMemoryCount: result.reflection?.autoMemoryIds?.length ?? 0,
+        memoryCategories: memories.map((entry) => entry.category).sort(),
+        profileNotes: result.reflection?.profileNotes ?? [],
+        boundaryNotes: result.reflection?.boundaryNotes ?? [],
+        openLoopNotes: result.reflection?.openLoopNotes ?? [],
+      }))
+    `)
+    assert.deepEqual(output.incidentKinds, ['no_progress', 'repeated_tool'])
+    assert.equal(output.reflectionSummary, 'Deployment repair reflection')
+    assert.equal(output.reflectionCount, 1)
+    assert.equal(output.autoMemoryCount, 10)
+    assert.deepEqual(output.profileNotes, ['The user is directly responsible for the deployment workflow.'])
+    assert.deepEqual(output.boundaryNotes, ['Do not claim the repair is complete without concrete verification evidence.'])
+    assert.deepEqual(output.openLoopNotes, ['Follow up with the final verification result once the repair path succeeds.'])
+    assert.deepEqual(output.memoryCategories, [
+      'reflection/boundary',
+      'reflection/communication',
+      'reflection/derived',
+      'reflection/failure',
+      'reflection/invariant',
+      'reflection/lesson',
+      'reflection/open_loop',
+      'reflection/profile',
+      'reflection/relationship',
+      'reflection/significant_event',
+    ])
+  })
+  it('reflects short human chats when they contain durable personal context', () => {
+    const output = runWithTempDataDir(`
+      const storageMod = await import('@/lib/server/storage')
+      const storage = storageMod.default || storageMod['module.exports'] || storageMod
+      const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
+      const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
+      storage.saveAgents({
+        'agent-a': {
+          id: 'agent-a',
+          name: 'Agent A',
+          provider: 'openai',
+          model: 'gpt-test',
+        },
+      })
+      storage.saveSessions({
+        s2: {
+          id: 's2',
+          name: 'Human Context Session',
+          cwd: process.cwd(),
+          user: 'tester',
+          provider: 'openai',
+          model: 'gpt-test',
+          claudeSessionId: null,
+          messages: [
+            { role: 'user', text: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.', time: 1 },
+            { role: 'assistant', text: 'Understood. I will keep updates tight and remember the move timing.', time: 2 },
+          ],
+          createdAt: 1,
+          lastActiveAt: 2,
+          sessionType: 'human',
+          agentId: 'agent-a',
+        },
+      })
+      storage.saveSettings({
+        supervisorEnabled: true,
+        supervisorRuntimeScope: 'both',
+        supervisorNoProgressLimit: 2,
+        supervisorRepeatedToolLimit: 3,
+        reflectionEnabled: true,
+        reflectionAutoWriteMemory: true,
+      })
+      const result = await mod.observeAutonomyRunOutcome({
+        runId: 'run-human',
+        sessionId: 's2',
+        agentId: 'agent-a',
+        source: 'chat',
+        status: 'completed',
+        resultText: 'I will keep updates tight and remember the move timing.',
+        sourceMessage: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.',
+      }, {
+        generateText: async () => JSON.stringify({
+          summary: 'Human context reflection',
+          communication: ['Prefer short check-ins while the move is in progress.'],
+          significant_events: ['Moving to Lisbon next month.'],
+          open_loops: ['Check in again once the move is complete.'],
+          profile: ['Currently planning a move to Lisbon.'],
+        }),
+      })
+      console.log(JSON.stringify({
+        reflectionSummary: result.reflection?.summary ?? null,
+        communicationNotes: result.reflection?.communicationNotes ?? [],
+        significantEventNotes: result.reflection?.significantEventNotes ?? [],
+        openLoopNotes: result.reflection?.openLoopNotes ?? [],
+      }))
+    `)
+    assert.equal(output.reflectionSummary, 'Human context reflection')
+    assert.deepEqual(output.communicationNotes, ['Prefer short check-ins while the move is in progress.'])
+    assert.deepEqual(output.significantEventNotes, ['Moving to Lisbon next month.'])
+    assert.deepEqual(output.openLoopNotes, ['Check in again once the move is complete.'])
+  })
+})