@swarmclawai/swarmclaw 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@swarmclawai/swarmclaw",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "description": "Self-hosted AI orchestration control plane for OpenClaw, multi-agent workflows, runtime skills, crypto wallets, and chat platform connectors.",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -0,0 +1,19 @@
1
+ import { NextResponse } from 'next/server'
2
+
3
+ import { listSupervisorIncidents } from '@/lib/server/autonomy/supervisor-reflection'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ function parseLimit(value: string | null): number | undefined {
8
+ if (!value) return undefined
9
+ const parsed = Number.parseInt(value, 10)
10
+ return Number.isFinite(parsed) ? parsed : undefined
11
+ }
12
+
13
+ export async function GET(req: Request) {
14
+ const url = new URL(req.url)
15
+ const sessionId = url.searchParams.get('sessionId') || undefined
16
+ const taskId = url.searchParams.get('taskId') || undefined
17
+ const limit = parseLimit(url.searchParams.get('limit'))
18
+ return NextResponse.json(listSupervisorIncidents({ sessionId, taskId, limit }))
19
+ }
@@ -0,0 +1,19 @@
1
+ import { NextResponse } from 'next/server'
2
+
3
+ import { listRunReflections } from '@/lib/server/autonomy/supervisor-reflection'
4
+
5
+ export const dynamic = 'force-dynamic'
6
+
7
+ function parseLimit(value: string | null): number | undefined {
8
+ if (!value) return undefined
9
+ const parsed = Number.parseInt(value, 10)
10
+ return Number.isFinite(parsed) ? parsed : undefined
11
+ }
12
+
13
+ export async function GET(req: Request) {
14
+ const url = new URL(req.url)
15
+ const sessionId = url.searchParams.get('sessionId') || undefined
16
+ const taskId = url.searchParams.get('taskId') || undefined
17
+ const limit = parseLimit(url.searchParams.get('limit'))
18
+ return NextResponse.json(listRunReflections({ sessionId, taskId, limit }))
19
+ }
@@ -3,6 +3,7 @@ import { normalizeHeartbeatSettingFields } from '@/lib/runtime/heartbeat-default
3
3
  import { normalizeWhatsAppApprovedContacts } from '@/lib/server/connectors/pairing'
4
4
  import { loadPublicSettings, loadSettings, saveSettings } from '@/lib/server/storage'
5
5
  import { normalizeRuntimeSettingFields } from '@/lib/runtime/runtime-loop'
6
+ import { normalizeSupervisorSettings } from '@/lib/autonomy/supervisor-settings'
6
7
  export const dynamic = 'force-dynamic'
7
8
 
8
9
 
@@ -85,6 +86,7 @@ export async function PUT(req: Request) {
85
86
  )
86
87
  const normalizedRuntime = normalizeRuntimeSettingFields(settings)
87
88
  const normalizedHeartbeat = normalizeHeartbeatSettingFields(settings)
89
+ const normalizedSupervisor = normalizeSupervisorSettings(settings)
88
90
  const nextResponseCacheTtlSec = parseIntSetting(
89
91
  settings.responseCacheTtlSec,
90
92
  15 * 60,
@@ -118,6 +120,7 @@ export async function PUT(req: Request) {
118
120
  settings.maxLinkedMemoriesExpanded = nextLinked
119
121
  Object.assign(settings, normalizedRuntime)
120
122
  Object.assign(settings, normalizedHeartbeat)
123
+ Object.assign(settings, normalizedSupervisor)
121
124
  settings.responseCacheTtlSec = nextResponseCacheTtlSec
122
125
  settings.responseCacheMaxEntries = nextResponseCacheMaxEntries
123
126
  settings.responseCacheEnabled = parseBoolSetting(settings.responseCacheEnabled, true)
@@ -10,6 +10,7 @@ import { UserPreferencesSection } from '@/views/settings/section-user-preference
10
10
  import { ThemeSection } from '@/views/settings/section-theme'
11
11
  import { OrchestratorSection } from '@/views/settings/section-orchestrator'
12
12
  import { RuntimeLoopSection } from '@/views/settings/section-runtime-loop'
13
+ import { SupervisorReflectionSection } from '@/views/settings/section-supervisor-reflection'
13
14
  import { CapabilityPolicySection } from '@/views/settings/section-capability-policy'
14
15
  import { WalletsSection } from '@/views/settings/section-wallets'
15
16
  import { StorageSection } from '@/views/settings/section-storage'
@@ -189,6 +190,14 @@ export default function SettingsRoute() {
189
190
  keywords: ['heartbeat', 'follow up', 'interval', 'ongoing'],
190
191
  render: () => <HeartbeatSection {...sectionProps} />,
191
192
  },
193
+ {
194
+ id: 'supervisor-reflection',
195
+ tabId: 'agents',
196
+ title: 'Supervisor & Reflection',
197
+ description: 'Automatic recovery from bad loops plus post-run reflection memory.',
198
+ keywords: ['supervisor', 'reflection', 'autonomy', 'memory', 'self-learning', 'replan'],
199
+ render: () => <SupervisorReflectionSection {...sectionProps} />,
200
+ },
192
201
  {
193
202
  id: 'embedding',
194
203
  tabId: 'memory',
package/src/cli/index.js CHANGED
@@ -44,6 +44,14 @@ const COMMAND_GROUPS = [
44
44
  }),
45
45
  ],
46
46
  },
47
+ {
48
+ name: 'autonomy',
49
+ description: 'Inspect supervisor incidents and reflection output',
50
+ commands: [
51
+ cmd('incidents', 'GET', '/autonomy/incidents', 'List supervisor incidents (use --query sessionId=..., --query taskId=..., --query limit=50)'),
52
+ cmd('reflections', 'GET', '/autonomy/reflections', 'List run reflections (use --query sessionId=..., --query taskId=..., --query limit=50)'),
53
+ ],
54
+ },
47
55
  {
48
56
  name: 'approvals',
49
57
  description: 'List and resolve human-loop approvals',
package/src/cli/spec.js CHANGED
@@ -25,6 +25,13 @@ const COMMAND_GROUPS = {
25
25
  login: { description: 'Validate an access key', method: 'POST', path: '/auth' },
26
26
  },
27
27
  },
28
+ autonomy: {
29
+ description: 'Autonomy supervisor inspection',
30
+ commands: {
31
+ incidents: { description: 'List supervisor incidents (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/incidents' },
32
+ reflections: { description: 'List run reflections (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/reflections' },
33
+ },
34
+ },
28
35
  approvals: {
29
36
  description: 'List and resolve human-loop approvals',
30
37
  commands: {
@@ -0,0 +1,80 @@
1
+ import type { AppSettings } from '@/types'
2
+
3
+ export type AutonomyRuntimeScope = 'chat' | 'task' | 'both'
4
+
5
+ export const DEFAULT_SUPERVISOR_ENABLED = true
6
+ export const DEFAULT_SUPERVISOR_RUNTIME_SCOPE: AutonomyRuntimeScope = 'both'
7
+ export const DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT = 2
8
+ export const DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT = 3
9
+ export const DEFAULT_REFLECTION_ENABLED = true
10
+ export const DEFAULT_REFLECTION_AUTO_WRITE_MEMORY = true
11
+
12
+ export const SUPERVISOR_NO_PROGRESS_LIMIT_MIN = 1
13
+ export const SUPERVISOR_NO_PROGRESS_LIMIT_MAX = 8
14
+ export const SUPERVISOR_REPEATED_TOOL_LIMIT_MIN = 2
15
+ export const SUPERVISOR_REPEATED_TOOL_LIMIT_MAX = 8
16
+
17
+ function parseIntSetting(value: unknown, fallback: number, min: number, max: number): number {
18
+ const parsed = typeof value === 'number'
19
+ ? value
20
+ : typeof value === 'string'
21
+ ? Number.parseInt(value, 10)
22
+ : Number.NaN
23
+ if (!Number.isFinite(parsed)) return fallback
24
+ return Math.max(min, Math.min(max, Math.trunc(parsed)))
25
+ }
26
+
27
+ function parseBoolSetting(value: unknown, fallback: boolean): boolean {
28
+ if (typeof value === 'boolean') return value
29
+ if (typeof value === 'string') {
30
+ const normalized = value.trim().toLowerCase()
31
+ if (['1', 'true', 'yes', 'on'].includes(normalized)) return true
32
+ if (['0', 'false', 'no', 'off'].includes(normalized)) return false
33
+ }
34
+ return fallback
35
+ }
36
+
37
+ export interface NormalizedSupervisorSettings {
38
+ supervisorEnabled: boolean
39
+ supervisorRuntimeScope: AutonomyRuntimeScope
40
+ supervisorNoProgressLimit: number
41
+ supervisorRepeatedToolLimit: number
42
+ reflectionEnabled: boolean
43
+ reflectionAutoWriteMemory: boolean
44
+ }
45
+
46
+ export function normalizeSupervisorSettings(
47
+ settings: Partial<AppSettings> | NormalizedSupervisorSettings | Record<string, unknown> | null | undefined,
48
+ ): NormalizedSupervisorSettings {
49
+ const current = settings || {}
50
+ const runtimeScope = current.supervisorRuntimeScope === 'chat'
51
+ || current.supervisorRuntimeScope === 'task'
52
+ || current.supervisorRuntimeScope === 'both'
53
+ ? current.supervisorRuntimeScope
54
+ : DEFAULT_SUPERVISOR_RUNTIME_SCOPE
55
+ return {
56
+ supervisorEnabled: parseBoolSetting(current.supervisorEnabled, DEFAULT_SUPERVISOR_ENABLED),
57
+ supervisorRuntimeScope: runtimeScope,
58
+ supervisorNoProgressLimit: parseIntSetting(
59
+ current.supervisorNoProgressLimit,
60
+ DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT,
61
+ SUPERVISOR_NO_PROGRESS_LIMIT_MIN,
62
+ SUPERVISOR_NO_PROGRESS_LIMIT_MAX,
63
+ ),
64
+ supervisorRepeatedToolLimit: parseIntSetting(
65
+ current.supervisorRepeatedToolLimit,
66
+ DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT,
67
+ SUPERVISOR_REPEATED_TOOL_LIMIT_MIN,
68
+ SUPERVISOR_REPEATED_TOOL_LIMIT_MAX,
69
+ ),
70
+ reflectionEnabled: parseBoolSetting(current.reflectionEnabled, DEFAULT_REFLECTION_ENABLED),
71
+ reflectionAutoWriteMemory: parseBoolSetting(current.reflectionAutoWriteMemory, DEFAULT_REFLECTION_AUTO_WRITE_MEMORY),
72
+ }
73
+ }
74
+
75
+ export function runtimeScopeIncludes(
76
+ runtimeScope: AutonomyRuntimeScope,
77
+ surface: 'chat' | 'task',
78
+ ): boolean {
79
+ return runtimeScope === 'both' || runtimeScope === surface
80
+ }
@@ -230,6 +230,41 @@ describe('main-agent-loop advanced', () => {
230
230
  assert.match(String(output.followupMessage || ''), /Resume from this next action/)
231
231
  })
232
232
 
233
+ it('uses the supervisor followup prompt when chat runs start thrashing on the same tool', () => {
234
+ const output = runWithTempDataDir(`
235
+ ${sessionSetupScript()}
236
+
237
+ const followup = mainLoop.handleMainLoopRunResult({
238
+ runId: 'run-supervisor',
239
+ sessionId: 'main',
240
+ message: 'Fix the broken deployment pipeline.',
241
+ internal: false,
242
+ source: 'chat',
243
+ resultText: 'Retried the same shell path several times and got the same failure.',
244
+ toolEvents: [
245
+ { name: 'shell', input: '{"cmd":"npm test"}' },
246
+ { name: 'shell', input: '{"cmd":"npm test"}' },
247
+ { name: 'shell', input: '{"cmd":"npm test"}' },
248
+ ],
249
+ })
250
+ const state = mainLoop.getMainLoopStateForSession('main')
251
+
252
+ console.log(JSON.stringify({
253
+ hasFollowup: followup !== null,
254
+ followupMessage: followup?.message ?? null,
255
+ chain: state?.followupChainCount ?? -1,
256
+ timelineSources: (state?.timeline || []).map((entry) => entry.source),
257
+ timelineNotes: (state?.timeline || []).map((entry) => entry.note),
258
+ }))
259
+ `)
260
+
261
+ assert.equal(output.hasFollowup, true, 'supervisor should queue a recovery followup')
262
+ assert.equal(output.chain, 1, 'supervisor followup increments the chain')
263
+ assert.match(String(output.followupMessage || ''), /Supervisor intervention: stop repeating shell/i)
264
+ assert.ok((output.timelineSources as string[]).includes('supervisor'), 'supervisor interventions should be visible in timeline')
265
+ assert.ok((output.timelineNotes as string[]).some((note) => /Repeated tool use detected/i.test(String(note))), 'timeline should explain the supervisor trigger')
266
+ })
267
+
233
268
  it('persists and upgrades a skill blocker across recommend/install steps', () => {
234
269
  const output = runWithTempDataDir(`
235
270
  ${sessionSetupScript()}
@@ -1,6 +1,7 @@
1
1
  import { hmrSingleton } from '@/lib/shared-utils'
2
2
  import type { GoalContract, Message, MessageToolEvent, Session } from '@/types'
3
3
  import { mergeGoalContracts, parseGoalContractFromText, parseMainLoopPlan, parseMainLoopReview } from '@/lib/server/agents/autonomy-contract'
4
+ import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
4
5
  import { enqueueSystemEvent } from '@/lib/server/runtime/system-events'
5
6
  import { loadSessions, loadSettings } from '@/lib/server/storage'
6
7
 
@@ -73,6 +74,7 @@ export interface PushMainLoopEventInput {
73
74
  }
74
75
 
75
76
  export interface HandleMainLoopRunResultInput {
77
+ runId?: string
76
78
  sessionId: string
77
79
  message: string
78
80
  internal: boolean
@@ -817,6 +819,8 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
817
819
  const state = getOrCreateState(input.sessionId)
818
820
  if (!state) return null
819
821
 
822
+ const sessions = loadSessions()
823
+ const session = sessions[input.sessionId] as Session | undefined
820
824
  const resultText = input.resultText || ''
821
825
  const persistedText = stripMainLoopMetaForPersistence(resultText)
822
826
  const toolEvents = Array.isArray(input.toolEvents) ? input.toolEvents : []
@@ -892,6 +896,36 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
892
896
  state.pendingEvents = []
893
897
  }
894
898
 
899
+ const assessment = assessAutonomyRun({
900
+ runId: input.runId || `main-loop-${input.sessionId}-${nowTs}`,
901
+ sessionId: input.sessionId,
902
+ source: input.source,
903
+ status: input.error ? 'failed' : 'completed',
904
+ resultText,
905
+ error: input.error,
906
+ toolEvents,
907
+ mainLoopState: state,
908
+ session: session || null,
909
+ settings: loadSettings(),
910
+ })
911
+ for (const incident of assessment.incidents) {
912
+ appendTimeline(
913
+ state,
914
+ 'supervisor',
915
+ `Supervisor: ${incident.summary}`,
916
+ incident.autoAction === 'block' ? 'blocked' : 'reflection',
917
+ )
918
+ }
919
+ const supervisorPrompt = assessment.shouldBlock ? null : assessment.interventionPrompt
920
+ if (assessment.shouldBlock) {
921
+ state.status = 'blocked'
922
+ state.paused = true
923
+ state.followupChainCount = 0
924
+ appendTimeline(state, 'supervisor', 'Supervisor paused the run after detecting a hard blocker.', 'blocked')
925
+ } else if (supervisorPrompt) {
926
+ state.paused = false
927
+ }
928
+
895
929
  const needsReplan = review?.needs_replan === true || ((review?.confidence ?? 1) < 0.45)
896
930
  const limit = followupLimit()
897
931
  const allowChatOriginFollowup = !input.internal
@@ -900,7 +934,9 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
900
934
  && !waitingForExternal
901
935
  && !gotTerminalAck
902
936
  && (
903
- needsReplan
937
+ !!supervisorPrompt
938
+ || assessment.shouldBlock
939
+ || needsReplan
904
940
  || heartbeat?.status === 'progress'
905
941
  || !!heartbeat?.nextAction
906
942
  || (!!plan?.current_step && toolNames.length > 0)
@@ -913,18 +949,19 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
913
949
  state.followupChainCount = 0
914
950
  if (gotTerminalAck && state.status !== 'blocked') state.status = 'ok'
915
951
  } else {
916
- const shouldContinue = needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
952
+ const shouldContinue = !!supervisorPrompt || needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
917
953
  if (shouldContinue && state.followupChainCount < limit) {
918
954
  state.followupChainCount += 1
919
- const message = needsReplan
920
- ? 'Replan from the latest outcome, then execute only the highest-value remaining step. Do not repeat completed work.'
921
- : state.nextAction
922
- ? `Continue the objective. Resume from this next action: ${state.nextAction}`
923
- : 'Continue the objective and finish the next highest-value remaining step.'
955
+ const message = supervisorPrompt
956
+ || (needsReplan
957
+ ? 'Replan from the latest outcome, then execute only the highest-value remaining step. Do not repeat completed work.'
958
+ : state.nextAction
959
+ ? `Continue the objective. Resume from this next action: ${state.nextAction}`
960
+ : 'Continue the objective and finish the next highest-value remaining step.')
924
961
  followup = {
925
962
  message,
926
963
  delayMs: DEFAULT_FOLLOWUP_DELAY_MS,
927
- dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${state.currentPlanStep || state.nextAction || 'continue'}`,
964
+ dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${supervisorPrompt ? 'supervisor' : (state.currentPlanStep || state.nextAction || 'continue')}`,
928
965
  }
929
966
  appendTimeline(state, 'followup', message, 'progress')
930
967
  } else {
@@ -0,0 +1,279 @@
1
+ import assert from 'node:assert/strict'
2
+ import fs from 'node:fs'
3
+ import os from 'node:os'
4
+ import path from 'node:path'
5
+ import { spawnSync } from 'node:child_process'
6
+ import { describe, it } from 'node:test'
7
+
8
+ import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
9
+
10
+ const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../../..')
11
+
12
+ function runWithTempDataDir(script: string) {
13
+ const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-supervisor-reflection-'))
14
+ try {
15
+ const result = spawnSync(
16
+ process.execPath,
17
+ ['--import', 'tsx', '--input-type=module', '--eval', script],
18
+ {
19
+ cwd: repoRoot,
20
+ env: {
21
+ ...process.env,
22
+ DATA_DIR: tempDir,
23
+ WORKSPACE_DIR: path.join(tempDir, 'workspace'),
24
+ SWARMCLAW_BUILD_MODE: '1',
25
+ },
26
+ encoding: 'utf-8',
27
+ timeout: 20000,
28
+ },
29
+ )
30
+ assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
31
+ const lines = (result.stdout || '')
32
+ .trim()
33
+ .split('\n')
34
+ .map((line) => line.trim())
35
+ .filter(Boolean)
36
+ const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
37
+ return JSON.parse(jsonLine || '{}') as Record<string, unknown>
38
+ } finally {
39
+ fs.rmSync(tempDir, { recursive: true, force: true })
40
+ }
41
+ }
42
+
43
+ describe('supervisor-reflection', () => {
44
+ it('recommends an automatic supervisor recovery step for repeated tool thrash', () => {
45
+ const assessment = assessAutonomyRun({
46
+ runId: 'run-1',
47
+ sessionId: 'session-1',
48
+ source: 'chat',
49
+ status: 'completed',
50
+ resultText: 'Retried the same shell command and got the same output.',
51
+ toolEvents: [
52
+ { name: 'shell', input: '{"cmd":"npm test"}' },
53
+ { name: 'shell', input: '{"cmd":"npm test"}' },
54
+ { name: 'shell', input: '{"cmd":"npm test"}' },
55
+ ],
56
+ mainLoopState: {
57
+ followupChainCount: 1,
58
+ summary: 'Retried the same shell command and got the same output.',
59
+ },
60
+ settings: {
61
+ supervisorEnabled: true,
62
+ supervisorRuntimeScope: 'both',
63
+ supervisorRepeatedToolLimit: 3,
64
+ supervisorNoProgressLimit: 2,
65
+ reflectionEnabled: true,
66
+ reflectionAutoWriteMemory: true,
67
+ },
68
+ session: {
69
+ id: 'session-1',
70
+ name: 'Autonomy Test',
71
+ cwd: process.cwd(),
72
+ user: 'tester',
73
+ provider: 'openai',
74
+ model: 'gpt-test',
75
+ claudeSessionId: null,
76
+ messages: [],
77
+ createdAt: Date.now(),
78
+ lastActiveAt: Date.now(),
79
+ } as any,
80
+ })
81
+
82
+ assert.ok(assessment.incidents.some((incident) => incident.kind === 'repeated_tool'))
83
+ assert.match(String(assessment.interventionPrompt || ''), /stop repeating shell/i)
84
+ assert.equal(assessment.shouldBlock, false)
85
+ })
86
+
87
+ it('persists reflections and auto-written reflection memory', () => {
88
+ const output = runWithTempDataDir(`
89
+ const storageMod = await import('@/lib/server/storage')
90
+ const storage = storageMod.default || storageMod['module.exports'] || storageMod
91
+ const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
92
+ const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
93
+ const memoryDbMod = await import('@/lib/server/memory/memory-db')
94
+ const memoryMod = memoryDbMod.default || memoryDbMod['module.exports'] || memoryDbMod
95
+
96
+ storage.saveAgents({
97
+ 'agent-a': {
98
+ id: 'agent-a',
99
+ name: 'Agent A',
100
+ provider: 'openai',
101
+ model: 'gpt-test',
102
+ },
103
+ })
104
+
105
+ storage.saveSessions({
106
+ s1: {
107
+ id: 's1',
108
+ name: 'Autonomy Session',
109
+ cwd: process.cwd(),
110
+ user: 'tester',
111
+ provider: 'openai',
112
+ model: 'gpt-test',
113
+ claudeSessionId: null,
114
+ messages: [
115
+ { role: 'user', text: 'Repair the deployment workflow and keep notes for later.', time: 1 },
116
+ { role: 'assistant', text: 'I retried the same shell path and nothing changed.', time: 2 },
117
+ ],
118
+ createdAt: 1,
119
+ lastActiveAt: 2,
120
+ sessionType: 'human',
121
+ agentId: 'agent-a',
122
+ },
123
+ })
124
+
125
+ storage.saveSettings({
126
+ supervisorEnabled: true,
127
+ supervisorRuntimeScope: 'both',
128
+ supervisorNoProgressLimit: 2,
129
+ supervisorRepeatedToolLimit: 3,
130
+ reflectionEnabled: true,
131
+ reflectionAutoWriteMemory: true,
132
+ })
133
+
134
+ const result = await mod.observeAutonomyRunOutcome({
135
+ runId: 'run-1',
136
+ sessionId: 's1',
137
+ agentId: 'agent-a',
138
+ source: 'chat',
139
+ status: 'completed',
140
+ resultText: 'I retried the same shell path and nothing changed.',
141
+ toolEvents: [
142
+ { name: 'shell', input: '{"cmd":"npm test"}' },
143
+ { name: 'shell', input: '{"cmd":"npm test"}' },
144
+ { name: 'shell', input: '{"cmd":"npm test"}' },
145
+ ],
146
+ mainLoopState: {
147
+ followupChainCount: 2,
148
+ summary: 'I retried the same shell path and nothing changed.',
149
+ },
150
+ sourceMessage: 'Repair the deployment workflow and keep notes for later.',
151
+ }, {
152
+ generateText: async () => JSON.stringify({
153
+ summary: 'Deployment repair reflection',
154
+ invariants: ['Verify changed files and command output before marking the task complete.'],
155
+ derived: ['Switch recovery strategy after two identical shell failures in a row.'],
156
+ failures: ['Repeated shell retries without changing inputs waste budget.'],
157
+ lessons: ['Capture a short recovery brief before continuing a stuck run.'],
158
+ communication: ['Keep execution updates concise when reporting repair progress.'],
159
+ relationship: ['Treat the user as wanting decisive recovery rather than repeated status chatter.'],
160
+ significant_events: ['The deployment workflow is currently broken and needs a confirmed repair path.'],
161
+ profile: ['The user is directly responsible for the deployment workflow.'],
162
+ boundaries: ['Do not claim the repair is complete without concrete verification evidence.'],
163
+ open_loops: ['Follow up with the final verification result once the repair path succeeds.'],
164
+ }),
165
+ })
166
+
167
+ const memories = memoryMod.getMemoryDb().list(undefined, 50)
168
+ .filter((entry) => entry.metadata && entry.metadata.origin === 'autonomy-reflection')
169
+
170
+ console.log(JSON.stringify({
171
+ incidentKinds: result.incidents.map((incident) => incident.kind).sort(),
172
+ reflectionSummary: result.reflection?.summary ?? null,
173
+ reflectionCount: mod.listRunReflections({ sessionId: 's1' }).length,
174
+ autoMemoryCount: result.reflection?.autoMemoryIds?.length ?? 0,
175
+ memoryCategories: memories.map((entry) => entry.category).sort(),
176
+ profileNotes: result.reflection?.profileNotes ?? [],
177
+ boundaryNotes: result.reflection?.boundaryNotes ?? [],
178
+ openLoopNotes: result.reflection?.openLoopNotes ?? [],
179
+ }))
180
+ `)
181
+
182
+ assert.deepEqual(output.incidentKinds, ['no_progress', 'repeated_tool'])
183
+ assert.equal(output.reflectionSummary, 'Deployment repair reflection')
184
+ assert.equal(output.reflectionCount, 1)
185
+ assert.equal(output.autoMemoryCount, 10)
186
+ assert.deepEqual(output.profileNotes, ['The user is directly responsible for the deployment workflow.'])
187
+ assert.deepEqual(output.boundaryNotes, ['Do not claim the repair is complete without concrete verification evidence.'])
188
+ assert.deepEqual(output.openLoopNotes, ['Follow up with the final verification result once the repair path succeeds.'])
189
+ assert.deepEqual(output.memoryCategories, [
190
+ 'reflection/boundary',
191
+ 'reflection/communication',
192
+ 'reflection/derived',
193
+ 'reflection/failure',
194
+ 'reflection/invariant',
195
+ 'reflection/lesson',
196
+ 'reflection/open_loop',
197
+ 'reflection/profile',
198
+ 'reflection/relationship',
199
+ 'reflection/significant_event',
200
+ ])
201
+ })
202
+
203
+ it('reflects short human chats when they contain durable personal context', () => {
204
+ const output = runWithTempDataDir(`
205
+ const storageMod = await import('@/lib/server/storage')
206
+ const storage = storageMod.default || storageMod['module.exports'] || storageMod
207
+ const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
208
+ const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
209
+
210
+ storage.saveAgents({
211
+ 'agent-a': {
212
+ id: 'agent-a',
213
+ name: 'Agent A',
214
+ provider: 'openai',
215
+ model: 'gpt-test',
216
+ },
217
+ })
218
+
219
+ storage.saveSessions({
220
+ s2: {
221
+ id: 's2',
222
+ name: 'Human Context Session',
223
+ cwd: process.cwd(),
224
+ user: 'tester',
225
+ provider: 'openai',
226
+ model: 'gpt-test',
227
+ claudeSessionId: null,
228
+ messages: [
229
+ { role: 'user', text: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.', time: 1 },
230
+ { role: 'assistant', text: 'Understood. I will keep updates tight and remember the move timing.', time: 2 },
231
+ ],
232
+ createdAt: 1,
233
+ lastActiveAt: 2,
234
+ sessionType: 'human',
235
+ agentId: 'agent-a',
236
+ },
237
+ })
238
+
239
+ storage.saveSettings({
240
+ supervisorEnabled: true,
241
+ supervisorRuntimeScope: 'both',
242
+ supervisorNoProgressLimit: 2,
243
+ supervisorRepeatedToolLimit: 3,
244
+ reflectionEnabled: true,
245
+ reflectionAutoWriteMemory: true,
246
+ })
247
+
248
+ const result = await mod.observeAutonomyRunOutcome({
249
+ runId: 'run-human',
250
+ sessionId: 's2',
251
+ agentId: 'agent-a',
252
+ source: 'chat',
253
+ status: 'completed',
254
+ resultText: 'I will keep updates tight and remember the move timing.',
255
+ sourceMessage: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.',
256
+ }, {
257
+ generateText: async () => JSON.stringify({
258
+ summary: 'Human context reflection',
259
+ communication: ['Prefer short check-ins while the move is in progress.'],
260
+ significant_events: ['Moving to Lisbon next month.'],
261
+ open_loops: ['Check in again once the move is complete.'],
262
+ profile: ['Currently planning a move to Lisbon.'],
263
+ }),
264
+ })
265
+
266
+ console.log(JSON.stringify({
267
+ reflectionSummary: result.reflection?.summary ?? null,
268
+ communicationNotes: result.reflection?.communicationNotes ?? [],
269
+ significantEventNotes: result.reflection?.significantEventNotes ?? [],
270
+ openLoopNotes: result.reflection?.openLoopNotes ?? [],
271
+ }))
272
+ `)
273
+
274
+ assert.equal(output.reflectionSummary, 'Human context reflection')
275
+ assert.deepEqual(output.communicationNotes, ['Prefer short check-ins while the move is in progress.'])
276
+ assert.deepEqual(output.significantEventNotes, ['Moving to Lisbon next month.'])
277
+ assert.deepEqual(output.openLoopNotes, ['Check in again once the move is complete.'])
278
+ })
279
+ })