@swarmclawai/swarmclaw 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/app/api/autonomy/incidents/route.ts +19 -0
- package/src/app/api/autonomy/reflections/route.ts +19 -0
- package/src/app/api/settings/route.ts +3 -0
- package/src/app/settings/page.tsx +9 -0
- package/src/cli/index.js +8 -0
- package/src/cli/spec.js +7 -0
- package/src/lib/autonomy/supervisor-settings.ts +80 -0
- package/src/lib/server/agents/main-agent-loop-advanced.test.ts +35 -0
- package/src/lib/server/agents/main-agent-loop.ts +45 -8
- package/src/lib/server/autonomy/supervisor-reflection.test.ts +279 -0
- package/src/lib/server/autonomy/supervisor-reflection.ts +817 -0
- package/src/lib/server/memory/temporal-decay.ts +6 -0
- package/src/lib/server/runtime/queue.ts +118 -12
- package/src/lib/server/runtime/session-run-manager.ts +51 -1
- package/src/lib/server/storage.ts +27 -1
- package/src/types/index.ts +57 -0
- package/src/views/settings/section-supervisor-reflection.tsx +148 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@swarmclawai/swarmclaw",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "Self-hosted AI orchestration control plane for OpenClaw, multi-agent workflows, runtime skills, crypto wallets, and chat platform connectors.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"publishConfig": {
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { NextResponse } from 'next/server'
|
|
2
|
+
|
|
3
|
+
import { listSupervisorIncidents } from '@/lib/server/autonomy/supervisor-reflection'
|
|
4
|
+
|
|
5
|
+
export const dynamic = 'force-dynamic'
|
|
6
|
+
|
|
7
|
+
function parseLimit(value: string | null): number | undefined {
|
|
8
|
+
if (!value) return undefined
|
|
9
|
+
const parsed = Number.parseInt(value, 10)
|
|
10
|
+
return Number.isFinite(parsed) ? parsed : undefined
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export async function GET(req: Request) {
|
|
14
|
+
const url = new URL(req.url)
|
|
15
|
+
const sessionId = url.searchParams.get('sessionId') || undefined
|
|
16
|
+
const taskId = url.searchParams.get('taskId') || undefined
|
|
17
|
+
const limit = parseLimit(url.searchParams.get('limit'))
|
|
18
|
+
return NextResponse.json(listSupervisorIncidents({ sessionId, taskId, limit }))
|
|
19
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { NextResponse } from 'next/server'
|
|
2
|
+
|
|
3
|
+
import { listRunReflections } from '@/lib/server/autonomy/supervisor-reflection'
|
|
4
|
+
|
|
5
|
+
export const dynamic = 'force-dynamic'
|
|
6
|
+
|
|
7
|
+
function parseLimit(value: string | null): number | undefined {
|
|
8
|
+
if (!value) return undefined
|
|
9
|
+
const parsed = Number.parseInt(value, 10)
|
|
10
|
+
return Number.isFinite(parsed) ? parsed : undefined
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export async function GET(req: Request) {
|
|
14
|
+
const url = new URL(req.url)
|
|
15
|
+
const sessionId = url.searchParams.get('sessionId') || undefined
|
|
16
|
+
const taskId = url.searchParams.get('taskId') || undefined
|
|
17
|
+
const limit = parseLimit(url.searchParams.get('limit'))
|
|
18
|
+
return NextResponse.json(listRunReflections({ sessionId, taskId, limit }))
|
|
19
|
+
}
|
|
@@ -3,6 +3,7 @@ import { normalizeHeartbeatSettingFields } from '@/lib/runtime/heartbeat-default
|
|
|
3
3
|
import { normalizeWhatsAppApprovedContacts } from '@/lib/server/connectors/pairing'
|
|
4
4
|
import { loadPublicSettings, loadSettings, saveSettings } from '@/lib/server/storage'
|
|
5
5
|
import { normalizeRuntimeSettingFields } from '@/lib/runtime/runtime-loop'
|
|
6
|
+
import { normalizeSupervisorSettings } from '@/lib/autonomy/supervisor-settings'
|
|
6
7
|
export const dynamic = 'force-dynamic'
|
|
7
8
|
|
|
8
9
|
|
|
@@ -85,6 +86,7 @@ export async function PUT(req: Request) {
|
|
|
85
86
|
)
|
|
86
87
|
const normalizedRuntime = normalizeRuntimeSettingFields(settings)
|
|
87
88
|
const normalizedHeartbeat = normalizeHeartbeatSettingFields(settings)
|
|
89
|
+
const normalizedSupervisor = normalizeSupervisorSettings(settings)
|
|
88
90
|
const nextResponseCacheTtlSec = parseIntSetting(
|
|
89
91
|
settings.responseCacheTtlSec,
|
|
90
92
|
15 * 60,
|
|
@@ -118,6 +120,7 @@ export async function PUT(req: Request) {
|
|
|
118
120
|
settings.maxLinkedMemoriesExpanded = nextLinked
|
|
119
121
|
Object.assign(settings, normalizedRuntime)
|
|
120
122
|
Object.assign(settings, normalizedHeartbeat)
|
|
123
|
+
Object.assign(settings, normalizedSupervisor)
|
|
121
124
|
settings.responseCacheTtlSec = nextResponseCacheTtlSec
|
|
122
125
|
settings.responseCacheMaxEntries = nextResponseCacheMaxEntries
|
|
123
126
|
settings.responseCacheEnabled = parseBoolSetting(settings.responseCacheEnabled, true)
|
|
@@ -10,6 +10,7 @@ import { UserPreferencesSection } from '@/views/settings/section-user-preference
|
|
|
10
10
|
import { ThemeSection } from '@/views/settings/section-theme'
|
|
11
11
|
import { OrchestratorSection } from '@/views/settings/section-orchestrator'
|
|
12
12
|
import { RuntimeLoopSection } from '@/views/settings/section-runtime-loop'
|
|
13
|
+
import { SupervisorReflectionSection } from '@/views/settings/section-supervisor-reflection'
|
|
13
14
|
import { CapabilityPolicySection } from '@/views/settings/section-capability-policy'
|
|
14
15
|
import { WalletsSection } from '@/views/settings/section-wallets'
|
|
15
16
|
import { StorageSection } from '@/views/settings/section-storage'
|
|
@@ -189,6 +190,14 @@ export default function SettingsRoute() {
|
|
|
189
190
|
keywords: ['heartbeat', 'follow up', 'interval', 'ongoing'],
|
|
190
191
|
render: () => <HeartbeatSection {...sectionProps} />,
|
|
191
192
|
},
|
|
193
|
+
{
|
|
194
|
+
id: 'supervisor-reflection',
|
|
195
|
+
tabId: 'agents',
|
|
196
|
+
title: 'Supervisor & Reflection',
|
|
197
|
+
description: 'Automatic recovery from bad loops plus post-run reflection memory.',
|
|
198
|
+
keywords: ['supervisor', 'reflection', 'autonomy', 'memory', 'self-learning', 'replan'],
|
|
199
|
+
render: () => <SupervisorReflectionSection {...sectionProps} />,
|
|
200
|
+
},
|
|
192
201
|
{
|
|
193
202
|
id: 'embedding',
|
|
194
203
|
tabId: 'memory',
|
package/src/cli/index.js
CHANGED
|
@@ -44,6 +44,14 @@ const COMMAND_GROUPS = [
|
|
|
44
44
|
}),
|
|
45
45
|
],
|
|
46
46
|
},
|
|
47
|
+
{
|
|
48
|
+
name: 'autonomy',
|
|
49
|
+
description: 'Inspect supervisor incidents and reflection output',
|
|
50
|
+
commands: [
|
|
51
|
+
cmd('incidents', 'GET', '/autonomy/incidents', 'List supervisor incidents (use --query sessionId=..., --query taskId=..., --query limit=50)'),
|
|
52
|
+
cmd('reflections', 'GET', '/autonomy/reflections', 'List run reflections (use --query sessionId=..., --query taskId=..., --query limit=50)'),
|
|
53
|
+
],
|
|
54
|
+
},
|
|
47
55
|
{
|
|
48
56
|
name: 'approvals',
|
|
49
57
|
description: 'List and resolve human-loop approvals',
|
package/src/cli/spec.js
CHANGED
|
@@ -25,6 +25,13 @@ const COMMAND_GROUPS = {
|
|
|
25
25
|
login: { description: 'Validate an access key', method: 'POST', path: '/auth' },
|
|
26
26
|
},
|
|
27
27
|
},
|
|
28
|
+
autonomy: {
|
|
29
|
+
description: 'Autonomy supervisor inspection',
|
|
30
|
+
commands: {
|
|
31
|
+
incidents: { description: 'List supervisor incidents (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/incidents' },
|
|
32
|
+
reflections: { description: 'List run reflections (supports --query sessionId=..., --query taskId=..., --query limit=50)', method: 'GET', path: '/autonomy/reflections' },
|
|
33
|
+
},
|
|
34
|
+
},
|
|
28
35
|
approvals: {
|
|
29
36
|
description: 'List and resolve human-loop approvals',
|
|
30
37
|
commands: {
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { AppSettings } from '@/types'
|
|
2
|
+
|
|
3
|
+
export type AutonomyRuntimeScope = 'chat' | 'task' | 'both'
|
|
4
|
+
|
|
5
|
+
export const DEFAULT_SUPERVISOR_ENABLED = true
|
|
6
|
+
export const DEFAULT_SUPERVISOR_RUNTIME_SCOPE: AutonomyRuntimeScope = 'both'
|
|
7
|
+
export const DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT = 2
|
|
8
|
+
export const DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT = 3
|
|
9
|
+
export const DEFAULT_REFLECTION_ENABLED = true
|
|
10
|
+
export const DEFAULT_REFLECTION_AUTO_WRITE_MEMORY = true
|
|
11
|
+
|
|
12
|
+
export const SUPERVISOR_NO_PROGRESS_LIMIT_MIN = 1
|
|
13
|
+
export const SUPERVISOR_NO_PROGRESS_LIMIT_MAX = 8
|
|
14
|
+
export const SUPERVISOR_REPEATED_TOOL_LIMIT_MIN = 2
|
|
15
|
+
export const SUPERVISOR_REPEATED_TOOL_LIMIT_MAX = 8
|
|
16
|
+
|
|
17
|
+
function parseIntSetting(value: unknown, fallback: number, min: number, max: number): number {
|
|
18
|
+
const parsed = typeof value === 'number'
|
|
19
|
+
? value
|
|
20
|
+
: typeof value === 'string'
|
|
21
|
+
? Number.parseInt(value, 10)
|
|
22
|
+
: Number.NaN
|
|
23
|
+
if (!Number.isFinite(parsed)) return fallback
|
|
24
|
+
return Math.max(min, Math.min(max, Math.trunc(parsed)))
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function parseBoolSetting(value: unknown, fallback: boolean): boolean {
|
|
28
|
+
if (typeof value === 'boolean') return value
|
|
29
|
+
if (typeof value === 'string') {
|
|
30
|
+
const normalized = value.trim().toLowerCase()
|
|
31
|
+
if (['1', 'true', 'yes', 'on'].includes(normalized)) return true
|
|
32
|
+
if (['0', 'false', 'no', 'off'].includes(normalized)) return false
|
|
33
|
+
}
|
|
34
|
+
return fallback
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface NormalizedSupervisorSettings {
|
|
38
|
+
supervisorEnabled: boolean
|
|
39
|
+
supervisorRuntimeScope: AutonomyRuntimeScope
|
|
40
|
+
supervisorNoProgressLimit: number
|
|
41
|
+
supervisorRepeatedToolLimit: number
|
|
42
|
+
reflectionEnabled: boolean
|
|
43
|
+
reflectionAutoWriteMemory: boolean
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function normalizeSupervisorSettings(
|
|
47
|
+
settings: Partial<AppSettings> | NormalizedSupervisorSettings | Record<string, unknown> | null | undefined,
|
|
48
|
+
): NormalizedSupervisorSettings {
|
|
49
|
+
const current = settings || {}
|
|
50
|
+
const runtimeScope = current.supervisorRuntimeScope === 'chat'
|
|
51
|
+
|| current.supervisorRuntimeScope === 'task'
|
|
52
|
+
|| current.supervisorRuntimeScope === 'both'
|
|
53
|
+
? current.supervisorRuntimeScope
|
|
54
|
+
: DEFAULT_SUPERVISOR_RUNTIME_SCOPE
|
|
55
|
+
return {
|
|
56
|
+
supervisorEnabled: parseBoolSetting(current.supervisorEnabled, DEFAULT_SUPERVISOR_ENABLED),
|
|
57
|
+
supervisorRuntimeScope: runtimeScope,
|
|
58
|
+
supervisorNoProgressLimit: parseIntSetting(
|
|
59
|
+
current.supervisorNoProgressLimit,
|
|
60
|
+
DEFAULT_SUPERVISOR_NO_PROGRESS_LIMIT,
|
|
61
|
+
SUPERVISOR_NO_PROGRESS_LIMIT_MIN,
|
|
62
|
+
SUPERVISOR_NO_PROGRESS_LIMIT_MAX,
|
|
63
|
+
),
|
|
64
|
+
supervisorRepeatedToolLimit: parseIntSetting(
|
|
65
|
+
current.supervisorRepeatedToolLimit,
|
|
66
|
+
DEFAULT_SUPERVISOR_REPEATED_TOOL_LIMIT,
|
|
67
|
+
SUPERVISOR_REPEATED_TOOL_LIMIT_MIN,
|
|
68
|
+
SUPERVISOR_REPEATED_TOOL_LIMIT_MAX,
|
|
69
|
+
),
|
|
70
|
+
reflectionEnabled: parseBoolSetting(current.reflectionEnabled, DEFAULT_REFLECTION_ENABLED),
|
|
71
|
+
reflectionAutoWriteMemory: parseBoolSetting(current.reflectionAutoWriteMemory, DEFAULT_REFLECTION_AUTO_WRITE_MEMORY),
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function runtimeScopeIncludes(
|
|
76
|
+
runtimeScope: AutonomyRuntimeScope,
|
|
77
|
+
surface: 'chat' | 'task',
|
|
78
|
+
): boolean {
|
|
79
|
+
return runtimeScope === 'both' || runtimeScope === surface
|
|
80
|
+
}
|
|
@@ -230,6 +230,41 @@ describe('main-agent-loop advanced', () => {
|
|
|
230
230
|
assert.match(String(output.followupMessage || ''), /Resume from this next action/)
|
|
231
231
|
})
|
|
232
232
|
|
|
233
|
+
it('uses the supervisor followup prompt when chat runs start thrashing on the same tool', () => {
|
|
234
|
+
const output = runWithTempDataDir(`
|
|
235
|
+
${sessionSetupScript()}
|
|
236
|
+
|
|
237
|
+
const followup = mainLoop.handleMainLoopRunResult({
|
|
238
|
+
runId: 'run-supervisor',
|
|
239
|
+
sessionId: 'main',
|
|
240
|
+
message: 'Fix the broken deployment pipeline.',
|
|
241
|
+
internal: false,
|
|
242
|
+
source: 'chat',
|
|
243
|
+
resultText: 'Retried the same shell path several times and got the same failure.',
|
|
244
|
+
toolEvents: [
|
|
245
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
246
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
247
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
248
|
+
],
|
|
249
|
+
})
|
|
250
|
+
const state = mainLoop.getMainLoopStateForSession('main')
|
|
251
|
+
|
|
252
|
+
console.log(JSON.stringify({
|
|
253
|
+
hasFollowup: followup !== null,
|
|
254
|
+
followupMessage: followup?.message ?? null,
|
|
255
|
+
chain: state?.followupChainCount ?? -1,
|
|
256
|
+
timelineSources: (state?.timeline || []).map((entry) => entry.source),
|
|
257
|
+
timelineNotes: (state?.timeline || []).map((entry) => entry.note),
|
|
258
|
+
}))
|
|
259
|
+
`)
|
|
260
|
+
|
|
261
|
+
assert.equal(output.hasFollowup, true, 'supervisor should queue a recovery followup')
|
|
262
|
+
assert.equal(output.chain, 1, 'supervisor followup increments the chain')
|
|
263
|
+
assert.match(String(output.followupMessage || ''), /Supervisor intervention: stop repeating shell/i)
|
|
264
|
+
assert.ok((output.timelineSources as string[]).includes('supervisor'), 'supervisor interventions should be visible in timeline')
|
|
265
|
+
assert.ok((output.timelineNotes as string[]).some((note) => /Repeated tool use detected/i.test(String(note))), 'timeline should explain the supervisor trigger')
|
|
266
|
+
})
|
|
267
|
+
|
|
233
268
|
it('persists and upgrades a skill blocker across recommend/install steps', () => {
|
|
234
269
|
const output = runWithTempDataDir(`
|
|
235
270
|
${sessionSetupScript()}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { hmrSingleton } from '@/lib/shared-utils'
|
|
2
2
|
import type { GoalContract, Message, MessageToolEvent, Session } from '@/types'
|
|
3
3
|
import { mergeGoalContracts, parseGoalContractFromText, parseMainLoopPlan, parseMainLoopReview } from '@/lib/server/agents/autonomy-contract'
|
|
4
|
+
import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
|
|
4
5
|
import { enqueueSystemEvent } from '@/lib/server/runtime/system-events'
|
|
5
6
|
import { loadSessions, loadSettings } from '@/lib/server/storage'
|
|
6
7
|
|
|
@@ -73,6 +74,7 @@ export interface PushMainLoopEventInput {
|
|
|
73
74
|
}
|
|
74
75
|
|
|
75
76
|
export interface HandleMainLoopRunResultInput {
|
|
77
|
+
runId?: string
|
|
76
78
|
sessionId: string
|
|
77
79
|
message: string
|
|
78
80
|
internal: boolean
|
|
@@ -817,6 +819,8 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
|
|
|
817
819
|
const state = getOrCreateState(input.sessionId)
|
|
818
820
|
if (!state) return null
|
|
819
821
|
|
|
822
|
+
const sessions = loadSessions()
|
|
823
|
+
const session = sessions[input.sessionId] as Session | undefined
|
|
820
824
|
const resultText = input.resultText || ''
|
|
821
825
|
const persistedText = stripMainLoopMetaForPersistence(resultText)
|
|
822
826
|
const toolEvents = Array.isArray(input.toolEvents) ? input.toolEvents : []
|
|
@@ -892,6 +896,36 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
|
|
|
892
896
|
state.pendingEvents = []
|
|
893
897
|
}
|
|
894
898
|
|
|
899
|
+
const assessment = assessAutonomyRun({
|
|
900
|
+
runId: input.runId || `main-loop-${input.sessionId}-${nowTs}`,
|
|
901
|
+
sessionId: input.sessionId,
|
|
902
|
+
source: input.source,
|
|
903
|
+
status: input.error ? 'failed' : 'completed',
|
|
904
|
+
resultText,
|
|
905
|
+
error: input.error,
|
|
906
|
+
toolEvents,
|
|
907
|
+
mainLoopState: state,
|
|
908
|
+
session: session || null,
|
|
909
|
+
settings: loadSettings(),
|
|
910
|
+
})
|
|
911
|
+
for (const incident of assessment.incidents) {
|
|
912
|
+
appendTimeline(
|
|
913
|
+
state,
|
|
914
|
+
'supervisor',
|
|
915
|
+
`Supervisor: ${incident.summary}`,
|
|
916
|
+
incident.autoAction === 'block' ? 'blocked' : 'reflection',
|
|
917
|
+
)
|
|
918
|
+
}
|
|
919
|
+
const supervisorPrompt = assessment.shouldBlock ? null : assessment.interventionPrompt
|
|
920
|
+
if (assessment.shouldBlock) {
|
|
921
|
+
state.status = 'blocked'
|
|
922
|
+
state.paused = true
|
|
923
|
+
state.followupChainCount = 0
|
|
924
|
+
appendTimeline(state, 'supervisor', 'Supervisor paused the run after detecting a hard blocker.', 'blocked')
|
|
925
|
+
} else if (supervisorPrompt) {
|
|
926
|
+
state.paused = false
|
|
927
|
+
}
|
|
928
|
+
|
|
895
929
|
const needsReplan = review?.needs_replan === true || ((review?.confidence ?? 1) < 0.45)
|
|
896
930
|
const limit = followupLimit()
|
|
897
931
|
const allowChatOriginFollowup = !input.internal
|
|
@@ -900,7 +934,9 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
|
|
|
900
934
|
&& !waitingForExternal
|
|
901
935
|
&& !gotTerminalAck
|
|
902
936
|
&& (
|
|
903
|
-
|
|
937
|
+
!!supervisorPrompt
|
|
938
|
+
|| assessment.shouldBlock
|
|
939
|
+
|| needsReplan
|
|
904
940
|
|| heartbeat?.status === 'progress'
|
|
905
941
|
|| !!heartbeat?.nextAction
|
|
906
942
|
|| (!!plan?.current_step && toolNames.length > 0)
|
|
@@ -913,18 +949,19 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
|
|
|
913
949
|
state.followupChainCount = 0
|
|
914
950
|
if (gotTerminalAck && state.status !== 'blocked') state.status = 'ok'
|
|
915
951
|
} else {
|
|
916
|
-
const shouldContinue = needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
|
|
952
|
+
const shouldContinue = !!supervisorPrompt || needsReplan || state.status === 'progress' || (!!state.nextAction && toolNames.length > 0)
|
|
917
953
|
if (shouldContinue && state.followupChainCount < limit) {
|
|
918
954
|
state.followupChainCount += 1
|
|
919
|
-
const message =
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
955
|
+
const message = supervisorPrompt
|
|
956
|
+
|| (needsReplan
|
|
957
|
+
? 'Replan from the latest outcome, then execute only the highest-value remaining step. Do not repeat completed work.'
|
|
958
|
+
: state.nextAction
|
|
959
|
+
? `Continue the objective. Resume from this next action: ${state.nextAction}`
|
|
960
|
+
: 'Continue the objective and finish the next highest-value remaining step.')
|
|
924
961
|
followup = {
|
|
925
962
|
message,
|
|
926
963
|
delayMs: DEFAULT_FOLLOWUP_DELAY_MS,
|
|
927
|
-
dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${state.currentPlanStep || state.nextAction || 'continue'}`,
|
|
964
|
+
dedupeKey: `main-loop:${input.sessionId}:${state.followupChainCount}:${supervisorPrompt ? 'supervisor' : (state.currentPlanStep || state.nextAction || 'continue')}`,
|
|
928
965
|
}
|
|
929
966
|
appendTimeline(state, 'followup', message, 'progress')
|
|
930
967
|
} else {
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import fs from 'node:fs'
|
|
3
|
+
import os from 'node:os'
|
|
4
|
+
import path from 'node:path'
|
|
5
|
+
import { spawnSync } from 'node:child_process'
|
|
6
|
+
import { describe, it } from 'node:test'
|
|
7
|
+
|
|
8
|
+
import { assessAutonomyRun } from '@/lib/server/autonomy/supervisor-reflection'
|
|
9
|
+
|
|
10
|
+
const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../../..')
|
|
11
|
+
|
|
12
|
+
function runWithTempDataDir(script: string) {
|
|
13
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-supervisor-reflection-'))
|
|
14
|
+
try {
|
|
15
|
+
const result = spawnSync(
|
|
16
|
+
process.execPath,
|
|
17
|
+
['--import', 'tsx', '--input-type=module', '--eval', script],
|
|
18
|
+
{
|
|
19
|
+
cwd: repoRoot,
|
|
20
|
+
env: {
|
|
21
|
+
...process.env,
|
|
22
|
+
DATA_DIR: tempDir,
|
|
23
|
+
WORKSPACE_DIR: path.join(tempDir, 'workspace'),
|
|
24
|
+
SWARMCLAW_BUILD_MODE: '1',
|
|
25
|
+
},
|
|
26
|
+
encoding: 'utf-8',
|
|
27
|
+
timeout: 20000,
|
|
28
|
+
},
|
|
29
|
+
)
|
|
30
|
+
assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
|
|
31
|
+
const lines = (result.stdout || '')
|
|
32
|
+
.trim()
|
|
33
|
+
.split('\n')
|
|
34
|
+
.map((line) => line.trim())
|
|
35
|
+
.filter(Boolean)
|
|
36
|
+
const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
|
|
37
|
+
return JSON.parse(jsonLine || '{}') as Record<string, unknown>
|
|
38
|
+
} finally {
|
|
39
|
+
fs.rmSync(tempDir, { recursive: true, force: true })
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
describe('supervisor-reflection', () => {
|
|
44
|
+
it('recommends an automatic supervisor recovery step for repeated tool thrash', () => {
|
|
45
|
+
const assessment = assessAutonomyRun({
|
|
46
|
+
runId: 'run-1',
|
|
47
|
+
sessionId: 'session-1',
|
|
48
|
+
source: 'chat',
|
|
49
|
+
status: 'completed',
|
|
50
|
+
resultText: 'Retried the same shell command and got the same output.',
|
|
51
|
+
toolEvents: [
|
|
52
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
53
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
54
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
55
|
+
],
|
|
56
|
+
mainLoopState: {
|
|
57
|
+
followupChainCount: 1,
|
|
58
|
+
summary: 'Retried the same shell command and got the same output.',
|
|
59
|
+
},
|
|
60
|
+
settings: {
|
|
61
|
+
supervisorEnabled: true,
|
|
62
|
+
supervisorRuntimeScope: 'both',
|
|
63
|
+
supervisorRepeatedToolLimit: 3,
|
|
64
|
+
supervisorNoProgressLimit: 2,
|
|
65
|
+
reflectionEnabled: true,
|
|
66
|
+
reflectionAutoWriteMemory: true,
|
|
67
|
+
},
|
|
68
|
+
session: {
|
|
69
|
+
id: 'session-1',
|
|
70
|
+
name: 'Autonomy Test',
|
|
71
|
+
cwd: process.cwd(),
|
|
72
|
+
user: 'tester',
|
|
73
|
+
provider: 'openai',
|
|
74
|
+
model: 'gpt-test',
|
|
75
|
+
claudeSessionId: null,
|
|
76
|
+
messages: [],
|
|
77
|
+
createdAt: Date.now(),
|
|
78
|
+
lastActiveAt: Date.now(),
|
|
79
|
+
} as any,
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
assert.ok(assessment.incidents.some((incident) => incident.kind === 'repeated_tool'))
|
|
83
|
+
assert.match(String(assessment.interventionPrompt || ''), /stop repeating shell/i)
|
|
84
|
+
assert.equal(assessment.shouldBlock, false)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
it('persists reflections and auto-written reflection memory', () => {
|
|
88
|
+
const output = runWithTempDataDir(`
|
|
89
|
+
const storageMod = await import('@/lib/server/storage')
|
|
90
|
+
const storage = storageMod.default || storageMod['module.exports'] || storageMod
|
|
91
|
+
const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
|
|
92
|
+
const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
|
|
93
|
+
const memoryDbMod = await import('@/lib/server/memory/memory-db')
|
|
94
|
+
const memoryMod = memoryDbMod.default || memoryDbMod['module.exports'] || memoryDbMod
|
|
95
|
+
|
|
96
|
+
storage.saveAgents({
|
|
97
|
+
'agent-a': {
|
|
98
|
+
id: 'agent-a',
|
|
99
|
+
name: 'Agent A',
|
|
100
|
+
provider: 'openai',
|
|
101
|
+
model: 'gpt-test',
|
|
102
|
+
},
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
storage.saveSessions({
|
|
106
|
+
s1: {
|
|
107
|
+
id: 's1',
|
|
108
|
+
name: 'Autonomy Session',
|
|
109
|
+
cwd: process.cwd(),
|
|
110
|
+
user: 'tester',
|
|
111
|
+
provider: 'openai',
|
|
112
|
+
model: 'gpt-test',
|
|
113
|
+
claudeSessionId: null,
|
|
114
|
+
messages: [
|
|
115
|
+
{ role: 'user', text: 'Repair the deployment workflow and keep notes for later.', time: 1 },
|
|
116
|
+
{ role: 'assistant', text: 'I retried the same shell path and nothing changed.', time: 2 },
|
|
117
|
+
],
|
|
118
|
+
createdAt: 1,
|
|
119
|
+
lastActiveAt: 2,
|
|
120
|
+
sessionType: 'human',
|
|
121
|
+
agentId: 'agent-a',
|
|
122
|
+
},
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
storage.saveSettings({
|
|
126
|
+
supervisorEnabled: true,
|
|
127
|
+
supervisorRuntimeScope: 'both',
|
|
128
|
+
supervisorNoProgressLimit: 2,
|
|
129
|
+
supervisorRepeatedToolLimit: 3,
|
|
130
|
+
reflectionEnabled: true,
|
|
131
|
+
reflectionAutoWriteMemory: true,
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
const result = await mod.observeAutonomyRunOutcome({
|
|
135
|
+
runId: 'run-1',
|
|
136
|
+
sessionId: 's1',
|
|
137
|
+
agentId: 'agent-a',
|
|
138
|
+
source: 'chat',
|
|
139
|
+
status: 'completed',
|
|
140
|
+
resultText: 'I retried the same shell path and nothing changed.',
|
|
141
|
+
toolEvents: [
|
|
142
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
143
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
144
|
+
{ name: 'shell', input: '{"cmd":"npm test"}' },
|
|
145
|
+
],
|
|
146
|
+
mainLoopState: {
|
|
147
|
+
followupChainCount: 2,
|
|
148
|
+
summary: 'I retried the same shell path and nothing changed.',
|
|
149
|
+
},
|
|
150
|
+
sourceMessage: 'Repair the deployment workflow and keep notes for later.',
|
|
151
|
+
}, {
|
|
152
|
+
generateText: async () => JSON.stringify({
|
|
153
|
+
summary: 'Deployment repair reflection',
|
|
154
|
+
invariants: ['Verify changed files and command output before marking the task complete.'],
|
|
155
|
+
derived: ['Switch recovery strategy after two identical shell failures in a row.'],
|
|
156
|
+
failures: ['Repeated shell retries without changing inputs waste budget.'],
|
|
157
|
+
lessons: ['Capture a short recovery brief before continuing a stuck run.'],
|
|
158
|
+
communication: ['Keep execution updates concise when reporting repair progress.'],
|
|
159
|
+
relationship: ['Treat the user as wanting decisive recovery rather than repeated status chatter.'],
|
|
160
|
+
significant_events: ['The deployment workflow is currently broken and needs a confirmed repair path.'],
|
|
161
|
+
profile: ['The user is directly responsible for the deployment workflow.'],
|
|
162
|
+
boundaries: ['Do not claim the repair is complete without concrete verification evidence.'],
|
|
163
|
+
open_loops: ['Follow up with the final verification result once the repair path succeeds.'],
|
|
164
|
+
}),
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
const memories = memoryMod.getMemoryDb().list(undefined, 50)
|
|
168
|
+
.filter((entry) => entry.metadata && entry.metadata.origin === 'autonomy-reflection')
|
|
169
|
+
|
|
170
|
+
console.log(JSON.stringify({
|
|
171
|
+
incidentKinds: result.incidents.map((incident) => incident.kind).sort(),
|
|
172
|
+
reflectionSummary: result.reflection?.summary ?? null,
|
|
173
|
+
reflectionCount: mod.listRunReflections({ sessionId: 's1' }).length,
|
|
174
|
+
autoMemoryCount: result.reflection?.autoMemoryIds?.length ?? 0,
|
|
175
|
+
memoryCategories: memories.map((entry) => entry.category).sort(),
|
|
176
|
+
profileNotes: result.reflection?.profileNotes ?? [],
|
|
177
|
+
boundaryNotes: result.reflection?.boundaryNotes ?? [],
|
|
178
|
+
openLoopNotes: result.reflection?.openLoopNotes ?? [],
|
|
179
|
+
}))
|
|
180
|
+
`)
|
|
181
|
+
|
|
182
|
+
assert.deepEqual(output.incidentKinds, ['no_progress', 'repeated_tool'])
|
|
183
|
+
assert.equal(output.reflectionSummary, 'Deployment repair reflection')
|
|
184
|
+
assert.equal(output.reflectionCount, 1)
|
|
185
|
+
assert.equal(output.autoMemoryCount, 10)
|
|
186
|
+
assert.deepEqual(output.profileNotes, ['The user is directly responsible for the deployment workflow.'])
|
|
187
|
+
assert.deepEqual(output.boundaryNotes, ['Do not claim the repair is complete without concrete verification evidence.'])
|
|
188
|
+
assert.deepEqual(output.openLoopNotes, ['Follow up with the final verification result once the repair path succeeds.'])
|
|
189
|
+
assert.deepEqual(output.memoryCategories, [
|
|
190
|
+
'reflection/boundary',
|
|
191
|
+
'reflection/communication',
|
|
192
|
+
'reflection/derived',
|
|
193
|
+
'reflection/failure',
|
|
194
|
+
'reflection/invariant',
|
|
195
|
+
'reflection/lesson',
|
|
196
|
+
'reflection/open_loop',
|
|
197
|
+
'reflection/profile',
|
|
198
|
+
'reflection/relationship',
|
|
199
|
+
'reflection/significant_event',
|
|
200
|
+
])
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('reflects short human chats when they contain durable personal context', () => {
|
|
204
|
+
const output = runWithTempDataDir(`
|
|
205
|
+
const storageMod = await import('@/lib/server/storage')
|
|
206
|
+
const storage = storageMod.default || storageMod['module.exports'] || storageMod
|
|
207
|
+
const reflectionMod = await import('@/lib/server/autonomy/supervisor-reflection')
|
|
208
|
+
const mod = reflectionMod.default || reflectionMod['module.exports'] || reflectionMod
|
|
209
|
+
|
|
210
|
+
storage.saveAgents({
|
|
211
|
+
'agent-a': {
|
|
212
|
+
id: 'agent-a',
|
|
213
|
+
name: 'Agent A',
|
|
214
|
+
provider: 'openai',
|
|
215
|
+
model: 'gpt-test',
|
|
216
|
+
},
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
storage.saveSessions({
|
|
220
|
+
s2: {
|
|
221
|
+
id: 's2',
|
|
222
|
+
name: 'Human Context Session',
|
|
223
|
+
cwd: process.cwd(),
|
|
224
|
+
user: 'tester',
|
|
225
|
+
provider: 'openai',
|
|
226
|
+
model: 'gpt-test',
|
|
227
|
+
claudeSessionId: null,
|
|
228
|
+
messages: [
|
|
229
|
+
{ role: 'user', text: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.', time: 1 },
|
|
230
|
+
{ role: 'assistant', text: 'Understood. I will keep updates tight and remember the move timing.', time: 2 },
|
|
231
|
+
],
|
|
232
|
+
createdAt: 1,
|
|
233
|
+
lastActiveAt: 2,
|
|
234
|
+
sessionType: 'human',
|
|
235
|
+
agentId: 'agent-a',
|
|
236
|
+
},
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
storage.saveSettings({
|
|
240
|
+
supervisorEnabled: true,
|
|
241
|
+
supervisorRuntimeScope: 'both',
|
|
242
|
+
supervisorNoProgressLimit: 2,
|
|
243
|
+
supervisorRepeatedToolLimit: 3,
|
|
244
|
+
reflectionEnabled: true,
|
|
245
|
+
reflectionAutoWriteMemory: true,
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
const result = await mod.observeAutonomyRunOutcome({
|
|
249
|
+
runId: 'run-human',
|
|
250
|
+
sessionId: 's2',
|
|
251
|
+
agentId: 'agent-a',
|
|
252
|
+
source: 'chat',
|
|
253
|
+
status: 'completed',
|
|
254
|
+
resultText: 'I will keep updates tight and remember the move timing.',
|
|
255
|
+
sourceMessage: 'I am moving to Lisbon next month and prefer short check-ins while I am juggling the move.',
|
|
256
|
+
}, {
|
|
257
|
+
generateText: async () => JSON.stringify({
|
|
258
|
+
summary: 'Human context reflection',
|
|
259
|
+
communication: ['Prefer short check-ins while the move is in progress.'],
|
|
260
|
+
significant_events: ['Moving to Lisbon next month.'],
|
|
261
|
+
open_loops: ['Check in again once the move is complete.'],
|
|
262
|
+
profile: ['Currently planning a move to Lisbon.'],
|
|
263
|
+
}),
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
console.log(JSON.stringify({
|
|
267
|
+
reflectionSummary: result.reflection?.summary ?? null,
|
|
268
|
+
communicationNotes: result.reflection?.communicationNotes ?? [],
|
|
269
|
+
significantEventNotes: result.reflection?.significantEventNotes ?? [],
|
|
270
|
+
openLoopNotes: result.reflection?.openLoopNotes ?? [],
|
|
271
|
+
}))
|
|
272
|
+
`)
|
|
273
|
+
|
|
274
|
+
assert.equal(output.reflectionSummary, 'Human context reflection')
|
|
275
|
+
assert.deepEqual(output.communicationNotes, ['Prefer short check-ins while the move is in progress.'])
|
|
276
|
+
assert.deepEqual(output.significantEventNotes, ['Moving to Lisbon next month.'])
|
|
277
|
+
assert.deepEqual(output.openLoopNotes, ['Check in again once the move is complete.'])
|
|
278
|
+
})
|
|
279
|
+
})
|