@swarmclawai/swarmclaw 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -7
- package/package.json +2 -2
- package/src/app/api/notifications/route.ts +11 -12
- package/src/app/page.tsx +9 -0
- package/src/components/chat/chat-list.tsx +10 -9
- package/src/components/home/home-view.tsx +13 -2
- package/src/components/layout/app-layout.tsx +1 -0
- package/src/components/shared/command-palette.tsx +4 -1
- package/src/components/shared/notification-center.tsx +7 -1
- package/src/components/shared/search-dialog.tsx +10 -2
- package/src/lib/local-observability.test.ts +73 -0
- package/src/lib/local-observability.ts +47 -0
- package/src/lib/notification-utils.test.ts +72 -0
- package/src/lib/notification-utils.ts +68 -0
- package/src/lib/providers/openclaw.test.ts +21 -1
- package/src/lib/providers/openclaw.ts +22 -0
- package/src/lib/runtime-loop.ts +1 -1
- package/src/lib/server/agent-thread-session.test.ts +41 -0
- package/src/lib/server/agent-thread-session.ts +1 -0
- package/src/lib/server/chat-execution-advanced.test.ts +7 -0
- package/src/lib/server/chat-execution-eval-history.test.ts +111 -0
- package/src/lib/server/chat-execution.ts +22 -5
- package/src/lib/server/create-notification.test.ts +94 -0
- package/src/lib/server/create-notification.ts +31 -25
- package/src/lib/server/daemon-state.test.ts +50 -0
- package/src/lib/server/daemon-state.ts +121 -38
- package/src/lib/server/eval/agent-regression-advanced.test.ts +11 -0
- package/src/lib/server/eval/agent-regression.test.ts +13 -1
- package/src/lib/server/eval/agent-regression.ts +221 -1
- package/src/lib/server/memory-policy.test.ts +32 -0
- package/src/lib/server/memory-policy.ts +25 -0
- package/src/lib/server/plugins-advanced.test.ts +7 -0
- package/src/lib/server/runtime-settings.test.ts +2 -2
- package/src/lib/server/session-tools/crud.test.ts +136 -0
- package/src/lib/server/session-tools/crud.ts +44 -2
- package/src/lib/server/session-tools/delegate-fallback.test.ts +36 -0
- package/src/lib/server/session-tools/delegate.ts +30 -0
- package/src/lib/server/session-tools/discovery-approvals.test.ts +40 -0
- package/src/lib/server/session-tools/discovery.ts +7 -6
- package/src/lib/server/session-tools/memory.ts +156 -6
- package/src/lib/server/session-tools/session-tools-wiring.test.ts +12 -0
- package/src/lib/server/session-tools/subagent.ts +4 -4
- package/src/lib/server/storage.ts +14 -1
- package/src/lib/server/stream-agent-chat.test.ts +78 -1
- package/src/lib/server/stream-agent-chat.ts +225 -22
- package/src/lib/server/tool-aliases.ts +1 -1
- package/src/lib/server/tool-capability-policy.ts +1 -1
- package/src/stores/use-app-store.ts +26 -1
- package/src/types/index.ts +4 -0
|
@@ -5,9 +5,9 @@ import { startScheduler, stopScheduler } from './scheduler'
|
|
|
5
5
|
import { sweepOrphanedBrowsers, getActiveBrowserCount } from './session-tools'
|
|
6
6
|
import {
|
|
7
7
|
autoStartConnectors,
|
|
8
|
-
stopAllConnectors,
|
|
9
8
|
listRunningConnectors,
|
|
10
9
|
sendConnectorMessage,
|
|
10
|
+
stopAllConnectors,
|
|
11
11
|
startConnector,
|
|
12
12
|
getConnectorStatus,
|
|
13
13
|
checkConnectorHealth,
|
|
@@ -25,7 +25,7 @@ import { WORKSPACE_DIR } from './data-dir'
|
|
|
25
25
|
import { DEFAULT_HEARTBEAT_INTERVAL_SEC } from '@/lib/heartbeat-defaults'
|
|
26
26
|
import { genId } from '@/lib/id'
|
|
27
27
|
import path from 'node:path'
|
|
28
|
-
import type { WebhookRetryEntry } from '@/types'
|
|
28
|
+
import type { Session, WebhookRetryEntry } from '@/types'
|
|
29
29
|
import { createNotification } from '@/lib/server/create-notification'
|
|
30
30
|
import { pingProvider, OPENAI_COMPATIBLE_DEFAULTS } from '@/lib/server/provider-health'
|
|
31
31
|
import { runIntegrityMonitor } from '@/lib/server/integrity-monitor'
|
|
@@ -75,17 +75,41 @@ function parseHeartbeatIntervalSec(value: unknown, fallback = DEFAULT_HEARTBEAT_
|
|
|
75
75
|
return Math.max(0, Math.min(3600, Math.trunc(parsed)))
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
function
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
78
|
+
export function shouldNotifyProviderReachabilityIssue(provider: string): boolean {
|
|
79
|
+
return provider !== 'openclaw'
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const SYNTHETIC_HEALTH_SESSION_USERS = new Set(['workbench', 'comparison-bench'])
|
|
83
|
+
const SYNTHETIC_HEALTH_SESSION_PREFIXES = ['wb-', 'cmp-']
|
|
84
|
+
|
|
85
|
+
function hasSyntheticHealthPrefix(value: unknown): boolean {
|
|
86
|
+
const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''
|
|
87
|
+
return SYNTHETIC_HEALTH_SESSION_PREFIXES.some((prefix) => normalized.startsWith(prefix))
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function shouldSuppressSessionHeartbeatHealthAlert(
|
|
91
|
+
session: Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>,
|
|
92
|
+
): boolean {
|
|
93
|
+
const user = typeof session.user === 'string' ? session.user.trim().toLowerCase() : ''
|
|
94
|
+
if (SYNTHETIC_HEALTH_SESSION_USERS.has(user)) return true
|
|
95
|
+
if (hasSyntheticHealthPrefix(session.id)) return true
|
|
96
|
+
if (hasSyntheticHealthPrefix(session.shortcutForAgentId)) return true
|
|
97
|
+
|
|
98
|
+
const name = typeof session.name === 'string' ? session.name.trim().toLowerCase() : ''
|
|
99
|
+
return name.startsWith('workbench ')
|
|
100
|
+
|| name.startsWith('assistant benchmark ')
|
|
101
|
+
|| name.startsWith('comparison ')
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export function shouldSuppressSyntheticAgentHealthAlert(agentId: string): boolean {
|
|
105
|
+
return hasSyntheticHealthPrefix(agentId)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function buildSessionHeartbeatHealthDedupKey(
|
|
109
|
+
sessionId: string,
|
|
110
|
+
state: 'stale' | 'auto-disabled',
|
|
111
|
+
): string {
|
|
112
|
+
return `health-alert:session-heartbeat:${state}:${sessionId}`
|
|
89
113
|
}
|
|
90
114
|
|
|
91
115
|
// Store daemon state on globalThis to survive HMR reloads
|
|
@@ -268,23 +292,24 @@ function stopQueueProcessor() {
|
|
|
268
292
|
}
|
|
269
293
|
}
|
|
270
294
|
|
|
271
|
-
async function sendHealthAlert(
|
|
295
|
+
async function sendHealthAlert(input: string | {
|
|
296
|
+
text: string
|
|
297
|
+
dedupKey?: string
|
|
298
|
+
entityType?: string
|
|
299
|
+
entityId?: string
|
|
300
|
+
}) {
|
|
301
|
+
const payload = typeof input === 'string' ? { text: input } : input
|
|
302
|
+
const text = payload.text
|
|
272
303
|
console.warn(`[health] ${text}`)
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
channelId: target,
|
|
283
|
-
text: `⚠️ SwarmClaw health alert: ${text}`,
|
|
284
|
-
})
|
|
285
|
-
} catch {
|
|
286
|
-
// alerts are best effort; log-only fallback is acceptable
|
|
287
|
-
}
|
|
304
|
+
createNotification({
|
|
305
|
+
type: 'warning',
|
|
306
|
+
title: 'SwarmClaw health alert',
|
|
307
|
+
message: text,
|
|
308
|
+
dedupKey: payload.dedupKey || `health-alert:${text}`,
|
|
309
|
+
entityType: payload.entityType,
|
|
310
|
+
entityId: payload.entityId,
|
|
311
|
+
dispatchExternally: false,
|
|
312
|
+
})
|
|
288
313
|
}
|
|
289
314
|
|
|
290
315
|
async function runConnectorHealthChecks(now: number) {
|
|
@@ -526,6 +551,7 @@ async function runProviderHealthChecks() {
|
|
|
526
551
|
|
|
527
552
|
for (const agent of Object.values(agents) as Record<string, unknown>[]) {
|
|
528
553
|
if (!agent?.id || typeof agent.id !== 'string') continue
|
|
554
|
+
if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
|
|
529
555
|
const provider = typeof agent.provider === 'string' ? agent.provider : ''
|
|
530
556
|
if (!provider || ['claude-cli', 'codex-cli', 'opencode-cli'].includes(provider)) continue
|
|
531
557
|
|
|
@@ -564,9 +590,11 @@ async function runProviderHealthChecks() {
|
|
|
564
590
|
const result = await pingProvider(tuple.provider, apiKey, endpoint)
|
|
565
591
|
|
|
566
592
|
if (!result.ok) {
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
593
|
+
if (!shouldNotifyProviderReachabilityIssue(tuple.provider)) {
|
|
594
|
+
continue
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
const dedupKey = `provider-down:${tuple.credentialId || tuple.provider}`
|
|
570
598
|
|
|
571
599
|
const entityType = tuple.credentialId ? 'credential' : undefined
|
|
572
600
|
const entityId = tuple.credentialId || undefined
|
|
@@ -596,6 +624,7 @@ async function runOpenClawGatewayHealthChecks() {
|
|
|
596
624
|
|
|
597
625
|
for (const agent of Object.values(agents) as Record<string, unknown>[]) {
|
|
598
626
|
if (!agent?.id || typeof agent.id !== 'string') continue
|
|
627
|
+
if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
|
|
599
628
|
if (agent.provider !== 'openclaw') continue
|
|
600
629
|
|
|
601
630
|
const key = `openclaw:${agent.id}`
|
|
@@ -747,6 +776,11 @@ async function runHealthChecks() {
|
|
|
747
776
|
if (session.heartbeatEnabled !== true) continue
|
|
748
777
|
|
|
749
778
|
const sessionId = session.id
|
|
779
|
+
if (shouldSuppressSessionHeartbeatHealthAlert(session as Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>)) {
|
|
780
|
+
ds.staleSessionIds.delete(sessionId)
|
|
781
|
+
continue
|
|
782
|
+
}
|
|
783
|
+
|
|
750
784
|
const sessionLabel = String(session.name || sessionId)
|
|
751
785
|
const intervalSec = parseHeartbeatIntervalSec(session.heartbeatIntervalSec, DEFAULT_HEARTBEAT_INTERVAL_SEC)
|
|
752
786
|
if (intervalSec <= 0) continue
|
|
@@ -762,9 +796,12 @@ async function runHealthChecks() {
|
|
|
762
796
|
session.lastActiveAt = now
|
|
763
797
|
sessionsDirty = true
|
|
764
798
|
ds.staleSessionIds.delete(sessionId)
|
|
765
|
-
await sendHealthAlert(
|
|
766
|
-
`Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
|
|
767
|
-
|
|
799
|
+
await sendHealthAlert({
|
|
800
|
+
text: `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
|
|
801
|
+
dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'auto-disabled'),
|
|
802
|
+
entityType: 'session',
|
|
803
|
+
entityId: sessionId,
|
|
804
|
+
})
|
|
768
805
|
continue
|
|
769
806
|
}
|
|
770
807
|
|
|
@@ -772,9 +809,12 @@ async function runHealthChecks() {
|
|
|
772
809
|
// Only alert on transition from healthy → stale (once per stale episode)
|
|
773
810
|
if (!ds.staleSessionIds.has(sessionId)) {
|
|
774
811
|
ds.staleSessionIds.add(sessionId)
|
|
775
|
-
await sendHealthAlert(
|
|
776
|
-
`Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
|
|
777
|
-
|
|
812
|
+
await sendHealthAlert({
|
|
813
|
+
text: `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
|
|
814
|
+
dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'stale'),
|
|
815
|
+
entityType: 'session',
|
|
816
|
+
entityId: sessionId,
|
|
817
|
+
})
|
|
778
818
|
}
|
|
779
819
|
}
|
|
780
820
|
}
|
|
@@ -980,6 +1020,49 @@ function stopEvalScheduler() {
|
|
|
980
1020
|
}
|
|
981
1021
|
}
|
|
982
1022
|
|
|
1023
|
+
function refreshDaemonTimersForHotReload() {
|
|
1024
|
+
if (!ds.running) return
|
|
1025
|
+
|
|
1026
|
+
if (ds.queueIntervalId) {
|
|
1027
|
+
clearInterval(ds.queueIntervalId)
|
|
1028
|
+
ds.queueIntervalId = null
|
|
1029
|
+
startQueueProcessor()
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
if (ds.browserSweepId) {
|
|
1033
|
+
clearInterval(ds.browserSweepId)
|
|
1034
|
+
ds.browserSweepId = null
|
|
1035
|
+
startBrowserSweep()
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (ds.healthIntervalId) {
|
|
1039
|
+
clearInterval(ds.healthIntervalId)
|
|
1040
|
+
ds.healthIntervalId = null
|
|
1041
|
+
startHealthMonitor()
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
if (ds.connectorHealthIntervalId) {
|
|
1045
|
+
clearInterval(ds.connectorHealthIntervalId)
|
|
1046
|
+
ds.connectorHealthIntervalId = null
|
|
1047
|
+
startConnectorHealthMonitor()
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) {
|
|
1051
|
+
stopMemoryConsolidation()
|
|
1052
|
+
startMemoryConsolidation()
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
if (ds.evalSchedulerIntervalId) {
|
|
1056
|
+
stopEvalScheduler()
|
|
1057
|
+
startEvalScheduler()
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
// In dev/HMR, the daemon state survives on globalThis while interval callbacks keep
|
|
1062
|
+
// the old module closure alive. Refresh long-lived timers so they always run the
|
|
1063
|
+
// current module's logic instead of stale health-alert code paths.
|
|
1064
|
+
refreshDaemonTimersForHotReload()
|
|
1065
|
+
|
|
983
1066
|
export async function runDaemonHealthCheckNow() {
|
|
984
1067
|
await Promise.all([
|
|
985
1068
|
runHealthChecks(),
|
|
@@ -3,6 +3,7 @@ import { describe, it } from 'node:test'
|
|
|
3
3
|
|
|
4
4
|
import {
|
|
5
5
|
AGENT_REGRESSION_SCENARIOS,
|
|
6
|
+
DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
|
|
6
7
|
resolveRegressionApprovalSettings,
|
|
7
8
|
resolveRegressionPlugins,
|
|
8
9
|
scoreAssertions,
|
|
@@ -266,6 +267,10 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
|
|
|
266
267
|
'mock-signup-secret-email',
|
|
267
268
|
'human-verified-signup',
|
|
268
269
|
'research-build-deploy',
|
|
270
|
+
'blackboard-orchestrator-fit',
|
|
271
|
+
'tool-call-efficiency',
|
|
272
|
+
'file-creation-followthrough',
|
|
273
|
+
'knowledge-first-file',
|
|
269
274
|
])
|
|
270
275
|
})
|
|
271
276
|
|
|
@@ -282,6 +287,12 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
|
|
|
282
287
|
}
|
|
283
288
|
})
|
|
284
289
|
|
|
290
|
+
it('default suite ids exclude exploratory regressions unless explicitly requested', () => {
|
|
291
|
+
assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
|
|
292
|
+
assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('approval-resume'))
|
|
293
|
+
assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('knowledge-first-file'))
|
|
294
|
+
})
|
|
295
|
+
|
|
285
296
|
it('no duplicate scenario IDs', () => {
|
|
286
297
|
const ids = AGENT_REGRESSION_SCENARIOS.map((s) => s.id)
|
|
287
298
|
const unique = new Set(ids)
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import assert from 'node:assert/strict'
|
|
2
2
|
import { describe, it } from 'node:test'
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
AGENT_REGRESSION_SCENARIOS,
|
|
5
|
+
DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
|
|
6
|
+
resolveRegressionApprovalSettings,
|
|
7
|
+
resolveRegressionPlugins,
|
|
8
|
+
scoreAssertions,
|
|
9
|
+
} from './agent-regression'
|
|
4
10
|
|
|
5
11
|
describe('agent regression helpers', () => {
|
|
6
12
|
it('maps approval modes onto deterministic platform settings', () => {
|
|
@@ -42,12 +48,18 @@ describe('agent regression helpers', () => {
|
|
|
42
48
|
'mock-signup-secret-email',
|
|
43
49
|
'human-verified-signup',
|
|
44
50
|
'research-build-deploy',
|
|
51
|
+
'blackboard-orchestrator-fit',
|
|
45
52
|
'tool-call-efficiency',
|
|
46
53
|
'file-creation-followthrough',
|
|
47
54
|
'knowledge-first-file',
|
|
48
55
|
])
|
|
49
56
|
})
|
|
50
57
|
|
|
58
|
+
it('keeps exploratory scenarios out of the default suite score path', () => {
|
|
59
|
+
assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('research-build-deploy'))
|
|
60
|
+
assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
|
|
61
|
+
})
|
|
62
|
+
|
|
51
63
|
it('can resolve regressions against the agent capability set instead of injected scenario plugins', () => {
|
|
52
64
|
const resolved = resolveRegressionPlugins(
|
|
53
65
|
['delegate', 'browser', 'manage_secrets', 'email'],
|
|
@@ -28,6 +28,7 @@ import {
|
|
|
28
28
|
loadTasks,
|
|
29
29
|
loadWatchJobs,
|
|
30
30
|
saveSchedules,
|
|
31
|
+
saveAgents,
|
|
31
32
|
saveSecrets,
|
|
32
33
|
saveSessions,
|
|
33
34
|
saveSettings,
|
|
@@ -104,6 +105,7 @@ interface AgentRegressionScenarioDefinition {
|
|
|
104
105
|
id: string
|
|
105
106
|
name: string
|
|
106
107
|
plugins: string[]
|
|
108
|
+
defaultInSuite?: boolean
|
|
107
109
|
run: (ctx: ScenarioContext) => Promise<AgentRegressionScenarioResult>
|
|
108
110
|
}
|
|
109
111
|
|
|
@@ -927,6 +929,15 @@ function cleanupScenarioState(ctx: ScenarioContext): void {
|
|
|
927
929
|
deleteApproval(approval.id)
|
|
928
930
|
}
|
|
929
931
|
|
|
932
|
+
const agents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
|
|
933
|
+
let agentsChanged = false
|
|
934
|
+
for (const [agentId, agent] of Object.entries(agents)) {
|
|
935
|
+
if (agent?.createdInSessionId !== ctx.sessionId) continue
|
|
936
|
+
delete agents[agentId]
|
|
937
|
+
agentsChanged = true
|
|
938
|
+
}
|
|
939
|
+
if (agentsChanged) saveAgents(agents)
|
|
940
|
+
|
|
930
941
|
const watchJobs = loadWatchJobs() as Record<string, Record<string, unknown>>
|
|
931
942
|
for (const [watchJobId, watchJob] of Object.entries(watchJobs)) {
|
|
932
943
|
if (watchJob?.sessionId === ctx.sessionId) deleteWatchJob(watchJobId)
|
|
@@ -1710,6 +1721,201 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
|
|
|
1710
1721
|
}
|
|
1711
1722
|
}
|
|
1712
1723
|
|
|
1724
|
+
async function runBlackboardOrchestratorScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
|
|
1725
|
+
const noteRelativePath = 'ops/blackboard-fit.md'
|
|
1726
|
+
const notePath = scenarioFile(ctx, noteRelativePath)
|
|
1727
|
+
const prefix = `Eval ${ctx.sessionId.slice(-8)}`
|
|
1728
|
+
const departments = [
|
|
1729
|
+
{ agentName: `${prefix} Research Orchestrator`, taskTitle: `${prefix} research-blackboard` },
|
|
1730
|
+
{ agentName: `${prefix} Product Orchestrator`, taskTitle: `${prefix} product-blackboard` },
|
|
1731
|
+
{ agentName: `${prefix} Revenue Orchestrator`, taskTitle: `${prefix} revenue-blackboard` },
|
|
1732
|
+
{ agentName: `${prefix} Operations Orchestrator`, taskTitle: `${prefix} operations-blackboard` },
|
|
1733
|
+
{ agentName: `${prefix} Support Orchestrator`, taskTitle: `${prefix} support-blackboard` },
|
|
1734
|
+
]
|
|
1735
|
+
|
|
1736
|
+
const agentsBefore = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
|
|
1737
|
+
const currentAgent = agentsBefore[ctx.agentId]
|
|
1738
|
+
const previousAssignScope = typeof currentAgent?.platformAssignScope === 'string'
|
|
1739
|
+
? currentAgent.platformAssignScope
|
|
1740
|
+
: undefined
|
|
1741
|
+
if (currentAgent) {
|
|
1742
|
+
currentAgent.platformAssignScope = 'all'
|
|
1743
|
+
currentAgent.updatedAt = Date.now()
|
|
1744
|
+
agentsBefore[ctx.agentId] = currentAgent
|
|
1745
|
+
saveAgents(agentsBefore)
|
|
1746
|
+
ctx.agent.platformAssignScope = 'all'
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
try {
|
|
1750
|
+
const prompt = [
|
|
1751
|
+
'Evaluate whether SwarmClaw can support a zero-work KING COO orchestrator model.',
|
|
1752
|
+
'Do not do any department implementation work yourself.',
|
|
1753
|
+
'Use manage_agents to create exactly five full agents with these exact names:',
|
|
1754
|
+
...departments.map((department) => `- ${department.agentName}`),
|
|
1755
|
+
'Give each agent a short soul that describes a department orchestrator or execution lead.',
|
|
1756
|
+
'Use manage_tasks to create exactly five backlog tasks with these exact titles and assign one task to each new agent:',
|
|
1757
|
+
...departments.map((department) => `- ${department.taskTitle}`),
|
|
1758
|
+
`Write "${noteRelativePath}" with sections "Supported Today", "Native Gaps", and "Bridging Plan".`,
|
|
1759
|
+
'In that note, mention that SwarmClaw already has native agents, task queues, memory, and chatroom/connector communication primitives.',
|
|
1760
|
+
'Also state clearly that SurrealDB would currently be an external integration or custom backing store, not a native built-in blackboard database.',
|
|
1761
|
+
'In your final response list the created agent ids, the created task ids, reference the note path, and say explicitly that the orchestrator stayed coordinator-only.',
|
|
1762
|
+
].join('\n')
|
|
1763
|
+
|
|
1764
|
+
await runTurn(ctx, prompt)
|
|
1765
|
+
|
|
1766
|
+
let createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
|
|
1767
|
+
.filter((agent) => agent?.createdInSessionId === ctx.sessionId)
|
|
1768
|
+
let createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
|
|
1769
|
+
.filter((task) => task?.createdInSessionId === ctx.sessionId)
|
|
1770
|
+
|
|
1771
|
+
if (createdAgents.length < departments.length || createdTasks.length < departments.length || !fs.existsSync(notePath)) {
|
|
1772
|
+
await runTurn(
|
|
1773
|
+
ctx,
|
|
1774
|
+
'Finish the orchestration setup exactly as requested. Create any missing agents, create any missing backlog tasks assigned to those agents, and write the missing architecture note. Do not do department implementation work yourself.',
|
|
1775
|
+
)
|
|
1776
|
+
createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
|
|
1777
|
+
.filter((agent) => agent?.createdInSessionId === ctx.sessionId)
|
|
1778
|
+
createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
|
|
1779
|
+
.filter((task) => task?.createdInSessionId === ctx.sessionId)
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
const expectedAgentNames = new Set(departments.map((department) => department.agentName))
|
|
1783
|
+
const expectedTaskTitles = new Set(departments.map((department) => department.taskTitle))
|
|
1784
|
+
const createdAgentIds = new Set(
|
|
1785
|
+
createdAgents
|
|
1786
|
+
.map((agent) => (typeof agent.id === 'string' ? agent.id : ''))
|
|
1787
|
+
.filter(Boolean),
|
|
1788
|
+
)
|
|
1789
|
+
const createdTaskTitles = new Set(
|
|
1790
|
+
createdTasks
|
|
1791
|
+
.map((task) => (typeof task.title === 'string' ? task.title : ''))
|
|
1792
|
+
.filter(Boolean),
|
|
1793
|
+
)
|
|
1794
|
+
const allTasksAssignedToCreatedAgents = createdTasks.length > 0 && createdTasks.every((task) => (
|
|
1795
|
+
typeof task.agentId === 'string' && createdAgentIds.has(task.agentId)
|
|
1796
|
+
))
|
|
1797
|
+
const noTasksAssignedToCoordinator = createdTasks.every((task) => task.agentId !== ctx.agentId)
|
|
1798
|
+
const statusesAcceptable = createdTasks.every((task) => ['backlog', 'queued'].includes(String(task.status || '')))
|
|
1799
|
+
|
|
1800
|
+
let noteText = readIfExists(notePath)
|
|
1801
|
+
let responseBlob = ctx.responseTexts.join('\n').toLowerCase()
|
|
1802
|
+
const hasCoordinatorSummary = () => (
|
|
1803
|
+
responseBlob.includes(noteRelativePath.toLowerCase())
|
|
1804
|
+
&& (
|
|
1805
|
+
responseBlob.includes('coordinator-only')
|
|
1806
|
+
|| responseBlob.includes('stayed coordinator')
|
|
1807
|
+
|| responseBlob.includes('did not do department implementation')
|
|
1808
|
+
)
|
|
1809
|
+
)
|
|
1810
|
+
const hasFitGapNote = () => {
|
|
1811
|
+
const noteLower = noteText.toLowerCase()
|
|
1812
|
+
return noteText.includes('## Supported Today')
|
|
1813
|
+
&& noteText.includes('## Native Gaps')
|
|
1814
|
+
&& noteText.includes('## Bridging Plan')
|
|
1815
|
+
&& noteLower.includes('surrealdb')
|
|
1816
|
+
&& (noteLower.includes('external integration') || noteLower.includes('not a native') || noteLower.includes('custom backing store'))
|
|
1817
|
+
&& noteLower.includes('task')
|
|
1818
|
+
&& noteLower.includes('agent')
|
|
1819
|
+
&& (noteLower.includes('chatroom') || noteLower.includes('connector'))
|
|
1820
|
+
&& noteLower.includes('memory')
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
if (!hasFitGapNote() || !hasCoordinatorSummary()) {
|
|
1824
|
+
await runTurn(
|
|
1825
|
+
ctx,
|
|
1826
|
+
[
|
|
1827
|
+
`If "${noteRelativePath}" is missing or incomplete, write it now with the required sections and SurrealDB gap explanation.`,
|
|
1828
|
+
'Then reply with a concise summary that lists the created agent ids, the created task ids, references the note path exactly, and says the orchestrator stayed coordinator-only.',
|
|
1829
|
+
].join(' '),
|
|
1830
|
+
)
|
|
1831
|
+
noteText = readIfExists(notePath)
|
|
1832
|
+
responseBlob = ctx.responseTexts.join('\n').toLowerCase()
|
|
1833
|
+
}
|
|
1834
|
+
|
|
1835
|
+
const assertions: RegressionAssertion[] = [
|
|
1836
|
+
{
|
|
1837
|
+
name: 'manage_agents used',
|
|
1838
|
+
passed: ctx.toolNames.has('manage_agents'),
|
|
1839
|
+
weight: 2,
|
|
1840
|
+
},
|
|
1841
|
+
{
|
|
1842
|
+
name: 'manage_tasks used',
|
|
1843
|
+
passed: ctx.toolNames.has('manage_tasks'),
|
|
1844
|
+
weight: 2,
|
|
1845
|
+
},
|
|
1846
|
+
{
|
|
1847
|
+
name: 'five orchestrator agents created',
|
|
1848
|
+
passed: createdAgents.length === departments.length
|
|
1849
|
+
&& createdAgents.every((agent) => expectedAgentNames.has(String(agent.name || ''))),
|
|
1850
|
+
details: createdAgents.map((agent) => `${agent.id}:${agent.name}`).join(' | '),
|
|
1851
|
+
weight: 3,
|
|
1852
|
+
},
|
|
1853
|
+
{
|
|
1854
|
+
name: 'five backlog tasks assigned to created agents',
|
|
1855
|
+
passed: createdTasks.length === departments.length
|
|
1856
|
+
&& [...expectedTaskTitles].every((title) => createdTaskTitles.has(title))
|
|
1857
|
+
&& allTasksAssignedToCreatedAgents
|
|
1858
|
+
&& statusesAcceptable,
|
|
1859
|
+
details: createdTasks.map((task) => `${task.id}:${task.title}:${task.agentId}:${task.status}`).join(' | '),
|
|
1860
|
+
weight: 3,
|
|
1861
|
+
},
|
|
1862
|
+
{
|
|
1863
|
+
name: 'coordinator kept execution off itself',
|
|
1864
|
+
passed: noTasksAssignedToCoordinator,
|
|
1865
|
+
weight: 2,
|
|
1866
|
+
},
|
|
1867
|
+
{
|
|
1868
|
+
name: 'fit-gap note explains native primitives and SurrealDB gap',
|
|
1869
|
+
passed: hasFitGapNote(),
|
|
1870
|
+
details: truncatePreview(noteText),
|
|
1871
|
+
weight: 3,
|
|
1872
|
+
},
|
|
1873
|
+
{
|
|
1874
|
+
name: 'final response references coordinator-only orchestration note',
|
|
1875
|
+
passed: hasCoordinatorSummary(),
|
|
1876
|
+
},
|
|
1877
|
+
]
|
|
1878
|
+
|
|
1879
|
+
const scored = scoreAssertions(assertions)
|
|
1880
|
+
return {
|
|
1881
|
+
scenarioId: 'blackboard-orchestrator-fit',
|
|
1882
|
+
name: 'Blackboard Orchestrator Fit',
|
|
1883
|
+
approvalMode: ctx.approvalMode,
|
|
1884
|
+
pluginMode: ctx.pluginMode,
|
|
1885
|
+
...scored,
|
|
1886
|
+
assertions,
|
|
1887
|
+
sessionId: ctx.sessionId,
|
|
1888
|
+
workspaceDir: ctx.workspaceDir,
|
|
1889
|
+
requiredPlugins: [...ctx.requiredPlugins],
|
|
1890
|
+
effectivePlugins: [...ctx.effectivePlugins],
|
|
1891
|
+
missingPlugins: [...ctx.missingPlugins],
|
|
1892
|
+
toolNames: Array.from(ctx.toolNames),
|
|
1893
|
+
approvalIds: [],
|
|
1894
|
+
approvals: buildApprovalEvidence(ctx.sessionId),
|
|
1895
|
+
responseTexts: [...ctx.responseTexts],
|
|
1896
|
+
turns: [...ctx.turns],
|
|
1897
|
+
artifacts: buildArtifactEvidence(ctx, [noteRelativePath]),
|
|
1898
|
+
evidencePaths: writeScenarioEvidenceFiles(ctx),
|
|
1899
|
+
}
|
|
1900
|
+
} finally {
|
|
1901
|
+
const latestAgents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
|
|
1902
|
+
if (latestAgents[ctx.agentId]) {
|
|
1903
|
+
if (previousAssignScope) {
|
|
1904
|
+
latestAgents[ctx.agentId].platformAssignScope = previousAssignScope
|
|
1905
|
+
} else {
|
|
1906
|
+
delete latestAgents[ctx.agentId].platformAssignScope
|
|
1907
|
+
}
|
|
1908
|
+
latestAgents[ctx.agentId].updatedAt = Date.now()
|
|
1909
|
+
saveAgents(latestAgents)
|
|
1910
|
+
}
|
|
1911
|
+
if (previousAssignScope) {
|
|
1912
|
+
ctx.agent.platformAssignScope = previousAssignScope
|
|
1913
|
+
} else {
|
|
1914
|
+
delete ctx.agent.platformAssignScope
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
}
|
|
1918
|
+
|
|
1713
1919
|
/**
|
|
1714
1920
|
* Tool-call efficiency scenario: verifies the agent uses minimal tool calls
|
|
1715
1921
|
* for simple data-retrieval tasks. Catches regressions like:
|
|
@@ -1988,6 +2194,13 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
|
|
|
1988
2194
|
plugins: ['http_request', 'files', 'browser'],
|
|
1989
2195
|
run: runResearchBuildDeployScenario,
|
|
1990
2196
|
},
|
|
2197
|
+
{
|
|
2198
|
+
id: 'blackboard-orchestrator-fit',
|
|
2199
|
+
name: 'Blackboard Orchestrator Fit',
|
|
2200
|
+
plugins: ['manage_agents', 'manage_tasks', 'files'],
|
|
2201
|
+
defaultInSuite: false,
|
|
2202
|
+
run: runBlackboardOrchestratorScenario,
|
|
2203
|
+
},
|
|
1991
2204
|
{
|
|
1992
2205
|
id: 'tool-call-efficiency',
|
|
1993
2206
|
name: 'Tool Call Efficiency',
|
|
@@ -2008,8 +2221,15 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
|
|
|
2008
2221
|
},
|
|
2009
2222
|
]
|
|
2010
2223
|
|
|
2224
|
+
export const DEFAULT_AGENT_REGRESSION_SCENARIO_IDS = AGENT_REGRESSION_SCENARIOS
|
|
2225
|
+
.filter((scenario) => scenario.defaultInSuite !== false)
|
|
2226
|
+
.map((scenario) => scenario.id)
|
|
2227
|
+
|
|
2011
2228
|
function resolveScenarioDefinitions(ids?: string[]): AgentRegressionScenarioDefinition[] {
|
|
2012
|
-
if (!ids?.length)
|
|
2229
|
+
if (!ids?.length) {
|
|
2230
|
+
const wanted = new Set(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS)
|
|
2231
|
+
return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
|
|
2232
|
+
}
|
|
2013
2233
|
const wanted = new Set(ids)
|
|
2014
2234
|
return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
|
|
2015
2235
|
}
|
|
@@ -2,6 +2,8 @@ import test from 'node:test'
|
|
|
2
2
|
import assert from 'node:assert/strict'
|
|
3
3
|
import {
|
|
4
4
|
inferAutomaticMemoryCategory,
|
|
5
|
+
isDirectMemoryWriteRequest,
|
|
6
|
+
isCurrentThreadRecallRequest,
|
|
5
7
|
normalizeMemoryCategory,
|
|
6
8
|
shouldAutoCaptureMemory,
|
|
7
9
|
shouldInjectMemoryContext,
|
|
@@ -21,6 +23,36 @@ test('shouldInjectMemoryContext skips low-signal greetings and acknowledgements'
|
|
|
21
23
|
assert.equal(shouldInjectMemoryContext('Compare the current deployment plan with what we decided yesterday'), true)
|
|
22
24
|
})
|
|
23
25
|
|
|
26
|
+
test('isCurrentThreadRecallRequest detects same-thread recall without matching store commands', () => {
|
|
27
|
+
assert.equal(
|
|
28
|
+
isCurrentThreadRecallRequest('What preferences did I tell you earlier in this conversation? Answer from this conversation only.'),
|
|
29
|
+
true,
|
|
30
|
+
)
|
|
31
|
+
assert.equal(
|
|
32
|
+
isCurrentThreadRecallRequest('You just stored my favorite language in this chat. What was it?'),
|
|
33
|
+
true,
|
|
34
|
+
)
|
|
35
|
+
assert.equal(
|
|
36
|
+
isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns.'),
|
|
37
|
+
false,
|
|
38
|
+
)
|
|
39
|
+
assert.equal(
|
|
40
|
+
isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
|
|
41
|
+
false,
|
|
42
|
+
)
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
test('isDirectMemoryWriteRequest detects remember-and-confirm turns without matching recall questions', () => {
|
|
46
|
+
assert.equal(
|
|
47
|
+
isDirectMemoryWriteRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
|
|
48
|
+
true,
|
|
49
|
+
)
|
|
50
|
+
assert.equal(
|
|
51
|
+
isDirectMemoryWriteRequest('What preferences did I tell you earlier in this conversation?'),
|
|
52
|
+
false,
|
|
53
|
+
)
|
|
54
|
+
})
|
|
55
|
+
|
|
24
56
|
test('shouldAutoCaptureMemory filters noisy turns', () => {
|
|
25
57
|
assert.equal(shouldAutoCaptureMemory({ message: 'thanks', response: 'Happy to help with that.', source: 'chat' }), false)
|
|
26
58
|
assert.equal(shouldAutoCaptureMemory({ message: 'Please save this to memory', response: 'Stored memory "note".', source: 'chat' }), false)
|
|
@@ -4,6 +4,10 @@ const ACK_RE = /^(?:ok(?:ay)?|cool|nice|got it|makes sense|thanks|thank you|thx|
|
|
|
4
4
|
const GREETING_RE = /^(?:hi|hello|hey|yo|morning|good morning|good afternoon|good evening)[.! ]*$/i
|
|
5
5
|
const MEMORY_META_RE = /\b(?:remember|memory|memorize|store this|save this|forget)\b/i
|
|
6
6
|
const LOW_SIGNAL_RESPONSE_RE = /^(?:HEARTBEAT_OK|NO_MESSAGE)\b/i
|
|
7
|
+
const CURRENT_THREAD_RECALL_MARKER_RE = /\b(?:this conversation|this chat|this thread|current conversation|current chat|current thread|same thread|same chat|same conversation|earlier in (?:this )?(?:conversation|chat|thread)|from (?:this|our) (?:conversation|chat|thread)|you just stored|you just said|we just discussed|we just decided)\b/i
|
|
8
|
+
const CURRENT_THREAD_RECALL_INTENT_RE = /\b(?:what|which|who|when|where|did|remind|recap|summarize|repeat|list|tell me|answer|confirm|recall|mention)\b/i
|
|
9
|
+
const DIRECT_MEMORY_WRITE_MARKER_RE = /\b(?:remember|memorize|store|save|write to memory|add to memory|update.*memory|correct.*memory)\b/i
|
|
10
|
+
const DIRECT_MEMORY_WRITE_FOLLOWUP_RE = /\b(?:confirm|recap|repeat|summarize|what you just stored|what you saved|what you updated)\b/i
|
|
7
11
|
|
|
8
12
|
function normalizeWhitespace(value: string): string {
|
|
9
13
|
return value.replace(/\s+/g, ' ').trim()
|
|
@@ -21,6 +25,27 @@ export function shouldInjectMemoryContext(message: string): boolean {
|
|
|
21
25
|
return true
|
|
22
26
|
}
|
|
23
27
|
|
|
28
|
+
export function isCurrentThreadRecallRequest(message: string): boolean {
|
|
29
|
+
const trimmed = normalizeWhitespace(message)
|
|
30
|
+
if (!trimmed) return false
|
|
31
|
+
if (!CURRENT_THREAD_RECALL_MARKER_RE.test(trimmed)) return false
|
|
32
|
+
if (DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed) && DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
|
|
33
|
+
if (/\b(?:remember|store|save)\b/i.test(trimmed) && !/\?\s*$/.test(trimmed) && !/\b(?:what|which|who|when|where|did|confirm|recap|summarize|repeat|list|tell me|answer|recall)\b/i.test(trimmed)) {
|
|
34
|
+
return false
|
|
35
|
+
}
|
|
36
|
+
return CURRENT_THREAD_RECALL_INTENT_RE.test(trimmed) || /\?\s*$/.test(trimmed)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function isDirectMemoryWriteRequest(message: string): boolean {
|
|
40
|
+
const trimmed = normalizeWhitespace(message)
|
|
41
|
+
if (!trimmed) return false
|
|
42
|
+
const directWriteLike = DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed)
|
|
43
|
+
if (!directWriteLike) return false
|
|
44
|
+
if (/\?\s*$/.test(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
|
|
45
|
+
if (isCurrentThreadRecallRequest(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
|
|
46
|
+
return true
|
|
47
|
+
}
|
|
48
|
+
|
|
24
49
|
export function shouldAutoCaptureMemoryTurn(message: string, response: string): boolean {
|
|
25
50
|
const normalizedMessage = normalizeWhitespace(message)
|
|
26
51
|
const normalizedResponse = normalizeWhitespace(response)
|
|
@@ -69,6 +69,13 @@ describe('canonicalizePluginId', () => {
|
|
|
69
69
|
assert.equal(canonicalizePluginId('memory_tool'), 'memory')
|
|
70
70
|
})
|
|
71
71
|
|
|
72
|
+
it('resolves narrow memory tools → memory', () => {
|
|
73
|
+
assert.equal(canonicalizePluginId('memory_search'), 'memory')
|
|
74
|
+
assert.equal(canonicalizePluginId('memory_get'), 'memory')
|
|
75
|
+
assert.equal(canonicalizePluginId('memory_store'), 'memory')
|
|
76
|
+
assert.equal(canonicalizePluginId('memory_update'), 'memory')
|
|
77
|
+
})
|
|
78
|
+
|
|
72
79
|
it('keeps files (already canonical)', () => {
|
|
73
80
|
assert.equal(canonicalizePluginId('files'), 'files')
|
|
74
81
|
})
|