@swarmclawai/swarmclaw 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +8 -7
  2. package/package.json +2 -2
  3. package/src/app/api/notifications/route.ts +11 -12
  4. package/src/app/page.tsx +9 -0
  5. package/src/components/chat/chat-list.tsx +10 -9
  6. package/src/components/home/home-view.tsx +13 -2
  7. package/src/components/layout/app-layout.tsx +1 -0
  8. package/src/components/shared/command-palette.tsx +4 -1
  9. package/src/components/shared/notification-center.tsx +7 -1
  10. package/src/components/shared/search-dialog.tsx +10 -2
  11. package/src/lib/local-observability.test.ts +73 -0
  12. package/src/lib/local-observability.ts +47 -0
  13. package/src/lib/notification-utils.test.ts +72 -0
  14. package/src/lib/notification-utils.ts +68 -0
  15. package/src/lib/providers/openclaw.test.ts +21 -1
  16. package/src/lib/providers/openclaw.ts +22 -0
  17. package/src/lib/runtime-loop.ts +1 -1
  18. package/src/lib/server/agent-thread-session.test.ts +41 -0
  19. package/src/lib/server/agent-thread-session.ts +1 -0
  20. package/src/lib/server/chat-execution-advanced.test.ts +7 -0
  21. package/src/lib/server/chat-execution-eval-history.test.ts +111 -0
  22. package/src/lib/server/chat-execution.ts +22 -5
  23. package/src/lib/server/create-notification.test.ts +94 -0
  24. package/src/lib/server/create-notification.ts +31 -25
  25. package/src/lib/server/daemon-state.test.ts +50 -0
  26. package/src/lib/server/daemon-state.ts +121 -38
  27. package/src/lib/server/eval/agent-regression-advanced.test.ts +11 -0
  28. package/src/lib/server/eval/agent-regression.test.ts +13 -1
  29. package/src/lib/server/eval/agent-regression.ts +221 -1
  30. package/src/lib/server/memory-policy.test.ts +32 -0
  31. package/src/lib/server/memory-policy.ts +25 -0
  32. package/src/lib/server/plugins-advanced.test.ts +7 -0
  33. package/src/lib/server/runtime-settings.test.ts +2 -2
  34. package/src/lib/server/session-tools/crud.test.ts +136 -0
  35. package/src/lib/server/session-tools/crud.ts +44 -2
  36. package/src/lib/server/session-tools/delegate-fallback.test.ts +36 -0
  37. package/src/lib/server/session-tools/delegate.ts +30 -0
  38. package/src/lib/server/session-tools/discovery-approvals.test.ts +40 -0
  39. package/src/lib/server/session-tools/discovery.ts +7 -6
  40. package/src/lib/server/session-tools/memory.ts +156 -6
  41. package/src/lib/server/session-tools/session-tools-wiring.test.ts +12 -0
  42. package/src/lib/server/session-tools/subagent.ts +4 -4
  43. package/src/lib/server/storage.ts +14 -1
  44. package/src/lib/server/stream-agent-chat.test.ts +78 -1
  45. package/src/lib/server/stream-agent-chat.ts +225 -22
  46. package/src/lib/server/tool-aliases.ts +1 -1
  47. package/src/lib/server/tool-capability-policy.ts +1 -1
  48. package/src/stores/use-app-store.ts +26 -1
  49. package/src/types/index.ts +4 -0
@@ -5,9 +5,9 @@ import { startScheduler, stopScheduler } from './scheduler'
5
5
  import { sweepOrphanedBrowsers, getActiveBrowserCount } from './session-tools'
6
6
  import {
7
7
  autoStartConnectors,
8
- stopAllConnectors,
9
8
  listRunningConnectors,
10
9
  sendConnectorMessage,
10
+ stopAllConnectors,
11
11
  startConnector,
12
12
  getConnectorStatus,
13
13
  checkConnectorHealth,
@@ -25,7 +25,7 @@ import { WORKSPACE_DIR } from './data-dir'
25
25
  import { DEFAULT_HEARTBEAT_INTERVAL_SEC } from '@/lib/heartbeat-defaults'
26
26
  import { genId } from '@/lib/id'
27
27
  import path from 'node:path'
28
- import type { WebhookRetryEntry } from '@/types'
28
+ import type { Session, WebhookRetryEntry } from '@/types'
29
29
  import { createNotification } from '@/lib/server/create-notification'
30
30
  import { pingProvider, OPENAI_COMPATIBLE_DEFAULTS } from '@/lib/server/provider-health'
31
31
  import { runIntegrityMonitor } from '@/lib/server/integrity-monitor'
@@ -75,17 +75,41 @@ function parseHeartbeatIntervalSec(value: unknown, fallback = DEFAULT_HEARTBEAT_
75
75
  return Math.max(0, Math.min(3600, Math.trunc(parsed)))
76
76
  }
77
77
 
78
- function normalizeWhatsappTarget(raw?: string | null): string | null {
79
- const input = (raw || '').trim()
80
- if (!input) return null
81
- if (input.includes('@')) return input
82
- let digits = input.replace(/[^\d+]/g, '')
83
- if (digits.startsWith('+')) digits = digits.slice(1)
84
- if (digits.startsWith('0') && digits.length >= 10) {
85
- digits = `44${digits.slice(1)}`
86
- }
87
- digits = digits.replace(/[^\d]/g, '')
88
- return digits ? `${digits}@s.whatsapp.net` : null
78
+ export function shouldNotifyProviderReachabilityIssue(provider: string): boolean {
79
+ return provider !== 'openclaw'
80
+ }
81
+
82
+ const SYNTHETIC_HEALTH_SESSION_USERS = new Set(['workbench', 'comparison-bench'])
83
+ const SYNTHETIC_HEALTH_SESSION_PREFIXES = ['wb-', 'cmp-']
84
+
85
+ function hasSyntheticHealthPrefix(value: unknown): boolean {
86
+ const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''
87
+ return SYNTHETIC_HEALTH_SESSION_PREFIXES.some((prefix) => normalized.startsWith(prefix))
88
+ }
89
+
90
+ export function shouldSuppressSessionHeartbeatHealthAlert(
91
+ session: Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>,
92
+ ): boolean {
93
+ const user = typeof session.user === 'string' ? session.user.trim().toLowerCase() : ''
94
+ if (SYNTHETIC_HEALTH_SESSION_USERS.has(user)) return true
95
+ if (hasSyntheticHealthPrefix(session.id)) return true
96
+ if (hasSyntheticHealthPrefix(session.shortcutForAgentId)) return true
97
+
98
+ const name = typeof session.name === 'string' ? session.name.trim().toLowerCase() : ''
99
+ return name.startsWith('workbench ')
100
+ || name.startsWith('assistant benchmark ')
101
+ || name.startsWith('comparison ')
102
+ }
103
+
104
+ export function shouldSuppressSyntheticAgentHealthAlert(agentId: string): boolean {
105
+ return hasSyntheticHealthPrefix(agentId)
106
+ }
107
+
108
+ export function buildSessionHeartbeatHealthDedupKey(
109
+ sessionId: string,
110
+ state: 'stale' | 'auto-disabled',
111
+ ): string {
112
+ return `health-alert:session-heartbeat:${state}:${sessionId}`
89
113
  }
90
114
 
91
115
  // Store daemon state on globalThis to survive HMR reloads
@@ -268,23 +292,24 @@ function stopQueueProcessor() {
268
292
  }
269
293
  }
270
294
 
271
- async function sendHealthAlert(text: string) {
295
+ async function sendHealthAlert(input: string | {
296
+ text: string
297
+ dedupKey?: string
298
+ entityType?: string
299
+ entityId?: string
300
+ }) {
301
+ const payload = typeof input === 'string' ? { text: input } : input
302
+ const text = payload.text
272
303
  console.warn(`[health] ${text}`)
273
- try {
274
- const running = listRunningConnectors('whatsapp')
275
- if (!running.length) return
276
- const candidate = running[0]
277
- const target = candidate.recentChannelId
278
- || normalizeWhatsappTarget(candidate.configuredTargets[0] || null)
279
- if (!target) return
280
- await sendConnectorMessage({
281
- connectorId: candidate.id,
282
- channelId: target,
283
- text: `⚠️ SwarmClaw health alert: ${text}`,
284
- })
285
- } catch {
286
- // alerts are best effort; log-only fallback is acceptable
287
- }
304
+ createNotification({
305
+ type: 'warning',
306
+ title: 'SwarmClaw health alert',
307
+ message: text,
308
+ dedupKey: payload.dedupKey || `health-alert:${text}`,
309
+ entityType: payload.entityType,
310
+ entityId: payload.entityId,
311
+ dispatchExternally: false,
312
+ })
288
313
  }
289
314
 
290
315
  async function runConnectorHealthChecks(now: number) {
@@ -526,6 +551,7 @@ async function runProviderHealthChecks() {
526
551
 
527
552
  for (const agent of Object.values(agents) as Record<string, unknown>[]) {
528
553
  if (!agent?.id || typeof agent.id !== 'string') continue
554
+ if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
529
555
  const provider = typeof agent.provider === 'string' ? agent.provider : ''
530
556
  if (!provider || ['claude-cli', 'codex-cli', 'opencode-cli'].includes(provider)) continue
531
557
 
@@ -564,9 +590,11 @@ async function runProviderHealthChecks() {
564
590
  const result = await pingProvider(tuple.provider, apiKey, endpoint)
565
591
 
566
592
  if (!result.ok) {
567
- const dedupKey = tuple.provider === 'openclaw'
568
- ? `openclaw-down:${tuple.agentId}`
569
- : `provider-down:${tuple.credentialId || tuple.provider}`
593
+ if (!shouldNotifyProviderReachabilityIssue(tuple.provider)) {
594
+ continue
595
+ }
596
+
597
+ const dedupKey = `provider-down:${tuple.credentialId || tuple.provider}`
570
598
 
571
599
  const entityType = tuple.credentialId ? 'credential' : undefined
572
600
  const entityId = tuple.credentialId || undefined
@@ -596,6 +624,7 @@ async function runOpenClawGatewayHealthChecks() {
596
624
 
597
625
  for (const agent of Object.values(agents) as Record<string, unknown>[]) {
598
626
  if (!agent?.id || typeof agent.id !== 'string') continue
627
+ if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
599
628
  if (agent.provider !== 'openclaw') continue
600
629
 
601
630
  const key = `openclaw:${agent.id}`
@@ -747,6 +776,11 @@ async function runHealthChecks() {
747
776
  if (session.heartbeatEnabled !== true) continue
748
777
 
749
778
  const sessionId = session.id
779
+ if (shouldSuppressSessionHeartbeatHealthAlert(session as Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>)) {
780
+ ds.staleSessionIds.delete(sessionId)
781
+ continue
782
+ }
783
+
750
784
  const sessionLabel = String(session.name || sessionId)
751
785
  const intervalSec = parseHeartbeatIntervalSec(session.heartbeatIntervalSec, DEFAULT_HEARTBEAT_INTERVAL_SEC)
752
786
  if (intervalSec <= 0) continue
@@ -762,9 +796,12 @@ async function runHealthChecks() {
762
796
  session.lastActiveAt = now
763
797
  sessionsDirty = true
764
798
  ds.staleSessionIds.delete(sessionId)
765
- await sendHealthAlert(
766
- `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
767
- )
799
+ await sendHealthAlert({
800
+ text: `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
801
+ dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'auto-disabled'),
802
+ entityType: 'session',
803
+ entityId: sessionId,
804
+ })
768
805
  continue
769
806
  }
770
807
 
@@ -772,9 +809,12 @@ async function runHealthChecks() {
772
809
  // Only alert on transition from healthy → stale (once per stale episode)
773
810
  if (!ds.staleSessionIds.has(sessionId)) {
774
811
  ds.staleSessionIds.add(sessionId)
775
- await sendHealthAlert(
776
- `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
777
- )
812
+ await sendHealthAlert({
813
+ text: `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
814
+ dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'stale'),
815
+ entityType: 'session',
816
+ entityId: sessionId,
817
+ })
778
818
  }
779
819
  }
780
820
  }
@@ -980,6 +1020,49 @@ function stopEvalScheduler() {
980
1020
  }
981
1021
  }
982
1022
 
1023
+ function refreshDaemonTimersForHotReload() {
1024
+ if (!ds.running) return
1025
+
1026
+ if (ds.queueIntervalId) {
1027
+ clearInterval(ds.queueIntervalId)
1028
+ ds.queueIntervalId = null
1029
+ startQueueProcessor()
1030
+ }
1031
+
1032
+ if (ds.browserSweepId) {
1033
+ clearInterval(ds.browserSweepId)
1034
+ ds.browserSweepId = null
1035
+ startBrowserSweep()
1036
+ }
1037
+
1038
+ if (ds.healthIntervalId) {
1039
+ clearInterval(ds.healthIntervalId)
1040
+ ds.healthIntervalId = null
1041
+ startHealthMonitor()
1042
+ }
1043
+
1044
+ if (ds.connectorHealthIntervalId) {
1045
+ clearInterval(ds.connectorHealthIntervalId)
1046
+ ds.connectorHealthIntervalId = null
1047
+ startConnectorHealthMonitor()
1048
+ }
1049
+
1050
+ if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) {
1051
+ stopMemoryConsolidation()
1052
+ startMemoryConsolidation()
1053
+ }
1054
+
1055
+ if (ds.evalSchedulerIntervalId) {
1056
+ stopEvalScheduler()
1057
+ startEvalScheduler()
1058
+ }
1059
+ }
1060
+
1061
+ // In dev/HMR, the daemon state survives on globalThis while interval callbacks keep
1062
+ // the old module closure alive. Refresh long-lived timers so they always run the
1063
+ // current module's logic instead of stale health-alert code paths.
1064
+ refreshDaemonTimersForHotReload()
1065
+
983
1066
  export async function runDaemonHealthCheckNow() {
984
1067
  await Promise.all([
985
1068
  runHealthChecks(),
@@ -3,6 +3,7 @@ import { describe, it } from 'node:test'
3
3
 
4
4
  import {
5
5
  AGENT_REGRESSION_SCENARIOS,
6
+ DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
6
7
  resolveRegressionApprovalSettings,
7
8
  resolveRegressionPlugins,
8
9
  scoreAssertions,
@@ -266,6 +267,10 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
266
267
  'mock-signup-secret-email',
267
268
  'human-verified-signup',
268
269
  'research-build-deploy',
270
+ 'blackboard-orchestrator-fit',
271
+ 'tool-call-efficiency',
272
+ 'file-creation-followthrough',
273
+ 'knowledge-first-file',
269
274
  ])
270
275
  })
271
276
 
@@ -282,6 +287,12 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
282
287
  }
283
288
  })
284
289
 
290
+ it('default suite ids exclude exploratory regressions unless explicitly requested', () => {
291
+ assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
292
+ assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('approval-resume'))
293
+ assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('knowledge-first-file'))
294
+ })
295
+
285
296
  it('no duplicate scenario IDs', () => {
286
297
  const ids = AGENT_REGRESSION_SCENARIOS.map((s) => s.id)
287
298
  const unique = new Set(ids)
@@ -1,6 +1,12 @@
1
1
  import assert from 'node:assert/strict'
2
2
  import { describe, it } from 'node:test'
3
- import { AGENT_REGRESSION_SCENARIOS, resolveRegressionApprovalSettings, resolveRegressionPlugins, scoreAssertions } from './agent-regression'
3
+ import {
4
+ AGENT_REGRESSION_SCENARIOS,
5
+ DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
6
+ resolveRegressionApprovalSettings,
7
+ resolveRegressionPlugins,
8
+ scoreAssertions,
9
+ } from './agent-regression'
4
10
 
5
11
  describe('agent regression helpers', () => {
6
12
  it('maps approval modes onto deterministic platform settings', () => {
@@ -42,12 +48,18 @@ describe('agent regression helpers', () => {
42
48
  'mock-signup-secret-email',
43
49
  'human-verified-signup',
44
50
  'research-build-deploy',
51
+ 'blackboard-orchestrator-fit',
45
52
  'tool-call-efficiency',
46
53
  'file-creation-followthrough',
47
54
  'knowledge-first-file',
48
55
  ])
49
56
  })
50
57
 
58
+ it('keeps exploratory scenarios out of the default suite score path', () => {
59
+ assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('research-build-deploy'))
60
+ assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
61
+ })
62
+
51
63
  it('can resolve regressions against the agent capability set instead of injected scenario plugins', () => {
52
64
  const resolved = resolveRegressionPlugins(
53
65
  ['delegate', 'browser', 'manage_secrets', 'email'],
@@ -28,6 +28,7 @@ import {
28
28
  loadTasks,
29
29
  loadWatchJobs,
30
30
  saveSchedules,
31
+ saveAgents,
31
32
  saveSecrets,
32
33
  saveSessions,
33
34
  saveSettings,
@@ -104,6 +105,7 @@ interface AgentRegressionScenarioDefinition {
104
105
  id: string
105
106
  name: string
106
107
  plugins: string[]
108
+ defaultInSuite?: boolean
107
109
  run: (ctx: ScenarioContext) => Promise<AgentRegressionScenarioResult>
108
110
  }
109
111
 
@@ -927,6 +929,15 @@ function cleanupScenarioState(ctx: ScenarioContext): void {
927
929
  deleteApproval(approval.id)
928
930
  }
929
931
 
932
+ const agents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
933
+ let agentsChanged = false
934
+ for (const [agentId, agent] of Object.entries(agents)) {
935
+ if (agent?.createdInSessionId !== ctx.sessionId) continue
936
+ delete agents[agentId]
937
+ agentsChanged = true
938
+ }
939
+ if (agentsChanged) saveAgents(agents)
940
+
930
941
  const watchJobs = loadWatchJobs() as Record<string, Record<string, unknown>>
931
942
  for (const [watchJobId, watchJob] of Object.entries(watchJobs)) {
932
943
  if (watchJob?.sessionId === ctx.sessionId) deleteWatchJob(watchJobId)
@@ -1710,6 +1721,201 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
1710
1721
  }
1711
1722
  }
1712
1723
 
1724
+ async function runBlackboardOrchestratorScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
1725
+ const noteRelativePath = 'ops/blackboard-fit.md'
1726
+ const notePath = scenarioFile(ctx, noteRelativePath)
1727
+ const prefix = `Eval ${ctx.sessionId.slice(-8)}`
1728
+ const departments = [
1729
+ { agentName: `${prefix} Research Orchestrator`, taskTitle: `${prefix} research-blackboard` },
1730
+ { agentName: `${prefix} Product Orchestrator`, taskTitle: `${prefix} product-blackboard` },
1731
+ { agentName: `${prefix} Revenue Orchestrator`, taskTitle: `${prefix} revenue-blackboard` },
1732
+ { agentName: `${prefix} Operations Orchestrator`, taskTitle: `${prefix} operations-blackboard` },
1733
+ { agentName: `${prefix} Support Orchestrator`, taskTitle: `${prefix} support-blackboard` },
1734
+ ]
1735
+
1736
+ const agentsBefore = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
1737
+ const currentAgent = agentsBefore[ctx.agentId]
1738
+ const previousAssignScope = typeof currentAgent?.platformAssignScope === 'string'
1739
+ ? currentAgent.platformAssignScope
1740
+ : undefined
1741
+ if (currentAgent) {
1742
+ currentAgent.platformAssignScope = 'all'
1743
+ currentAgent.updatedAt = Date.now()
1744
+ agentsBefore[ctx.agentId] = currentAgent
1745
+ saveAgents(agentsBefore)
1746
+ ctx.agent.platformAssignScope = 'all'
1747
+ }
1748
+
1749
+ try {
1750
+ const prompt = [
1751
+ 'Evaluate whether SwarmClaw can support a zero-work KING COO orchestrator model.',
1752
+ 'Do not do any department implementation work yourself.',
1753
+ 'Use manage_agents to create exactly five full agents with these exact names:',
1754
+ ...departments.map((department) => `- ${department.agentName}`),
1755
+ 'Give each agent a short soul that describes a department orchestrator or execution lead.',
1756
+ 'Use manage_tasks to create exactly five backlog tasks with these exact titles and assign one task to each new agent:',
1757
+ ...departments.map((department) => `- ${department.taskTitle}`),
1758
+ `Write "${noteRelativePath}" with sections "Supported Today", "Native Gaps", and "Bridging Plan".`,
1759
+ 'In that note, mention that SwarmClaw already has native agents, task queues, memory, and chatroom/connector communication primitives.',
1760
+ 'Also state clearly that SurrealDB would currently be an external integration or custom backing store, not a native built-in blackboard database.',
1761
+ 'In your final response list the created agent ids, the created task ids, reference the note path, and say explicitly that the orchestrator stayed coordinator-only.',
1762
+ ].join('\n')
1763
+
1764
+ await runTurn(ctx, prompt)
1765
+
1766
+ let createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
1767
+ .filter((agent) => agent?.createdInSessionId === ctx.sessionId)
1768
+ let createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
1769
+ .filter((task) => task?.createdInSessionId === ctx.sessionId)
1770
+
1771
+ if (createdAgents.length < departments.length || createdTasks.length < departments.length || !fs.existsSync(notePath)) {
1772
+ await runTurn(
1773
+ ctx,
1774
+ 'Finish the orchestration setup exactly as requested. Create any missing agents, create any missing backlog tasks assigned to those agents, and write the missing architecture note. Do not do department implementation work yourself.',
1775
+ )
1776
+ createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
1777
+ .filter((agent) => agent?.createdInSessionId === ctx.sessionId)
1778
+ createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
1779
+ .filter((task) => task?.createdInSessionId === ctx.sessionId)
1780
+ }
1781
+
1782
+ const expectedAgentNames = new Set(departments.map((department) => department.agentName))
1783
+ const expectedTaskTitles = new Set(departments.map((department) => department.taskTitle))
1784
+ const createdAgentIds = new Set(
1785
+ createdAgents
1786
+ .map((agent) => (typeof agent.id === 'string' ? agent.id : ''))
1787
+ .filter(Boolean),
1788
+ )
1789
+ const createdTaskTitles = new Set(
1790
+ createdTasks
1791
+ .map((task) => (typeof task.title === 'string' ? task.title : ''))
1792
+ .filter(Boolean),
1793
+ )
1794
+ const allTasksAssignedToCreatedAgents = createdTasks.length > 0 && createdTasks.every((task) => (
1795
+ typeof task.agentId === 'string' && createdAgentIds.has(task.agentId)
1796
+ ))
1797
+ const noTasksAssignedToCoordinator = createdTasks.every((task) => task.agentId !== ctx.agentId)
1798
+ const statusesAcceptable = createdTasks.every((task) => ['backlog', 'queued'].includes(String(task.status || '')))
1799
+
1800
+ let noteText = readIfExists(notePath)
1801
+ let responseBlob = ctx.responseTexts.join('\n').toLowerCase()
1802
+ const hasCoordinatorSummary = () => (
1803
+ responseBlob.includes(noteRelativePath.toLowerCase())
1804
+ && (
1805
+ responseBlob.includes('coordinator-only')
1806
+ || responseBlob.includes('stayed coordinator')
1807
+ || responseBlob.includes('did not do department implementation')
1808
+ )
1809
+ )
1810
+ const hasFitGapNote = () => {
1811
+ const noteLower = noteText.toLowerCase()
1812
+ return noteText.includes('## Supported Today')
1813
+ && noteText.includes('## Native Gaps')
1814
+ && noteText.includes('## Bridging Plan')
1815
+ && noteLower.includes('surrealdb')
1816
+ && (noteLower.includes('external integration') || noteLower.includes('not a native') || noteLower.includes('custom backing store'))
1817
+ && noteLower.includes('task')
1818
+ && noteLower.includes('agent')
1819
+ && (noteLower.includes('chatroom') || noteLower.includes('connector'))
1820
+ && noteLower.includes('memory')
1821
+ }
1822
+
1823
+ if (!hasFitGapNote() || !hasCoordinatorSummary()) {
1824
+ await runTurn(
1825
+ ctx,
1826
+ [
1827
+ `If "${noteRelativePath}" is missing or incomplete, write it now with the required sections and SurrealDB gap explanation.`,
1828
+ 'Then reply with a concise summary that lists the created agent ids, the created task ids, references the note path exactly, and says the orchestrator stayed coordinator-only.',
1829
+ ].join(' '),
1830
+ )
1831
+ noteText = readIfExists(notePath)
1832
+ responseBlob = ctx.responseTexts.join('\n').toLowerCase()
1833
+ }
1834
+
1835
+ const assertions: RegressionAssertion[] = [
1836
+ {
1837
+ name: 'manage_agents used',
1838
+ passed: ctx.toolNames.has('manage_agents'),
1839
+ weight: 2,
1840
+ },
1841
+ {
1842
+ name: 'manage_tasks used',
1843
+ passed: ctx.toolNames.has('manage_tasks'),
1844
+ weight: 2,
1845
+ },
1846
+ {
1847
+ name: 'five orchestrator agents created',
1848
+ passed: createdAgents.length === departments.length
1849
+ && createdAgents.every((agent) => expectedAgentNames.has(String(agent.name || ''))),
1850
+ details: createdAgents.map((agent) => `${agent.id}:${agent.name}`).join(' | '),
1851
+ weight: 3,
1852
+ },
1853
+ {
1854
+ name: 'five backlog tasks assigned to created agents',
1855
+ passed: createdTasks.length === departments.length
1856
+ && [...expectedTaskTitles].every((title) => createdTaskTitles.has(title))
1857
+ && allTasksAssignedToCreatedAgents
1858
+ && statusesAcceptable,
1859
+ details: createdTasks.map((task) => `${task.id}:${task.title}:${task.agentId}:${task.status}`).join(' | '),
1860
+ weight: 3,
1861
+ },
1862
+ {
1863
+ name: 'coordinator kept execution off itself',
1864
+ passed: noTasksAssignedToCoordinator,
1865
+ weight: 2,
1866
+ },
1867
+ {
1868
+ name: 'fit-gap note explains native primitives and SurrealDB gap',
1869
+ passed: hasFitGapNote(),
1870
+ details: truncatePreview(noteText),
1871
+ weight: 3,
1872
+ },
1873
+ {
1874
+ name: 'final response references coordinator-only orchestration note',
1875
+ passed: hasCoordinatorSummary(),
1876
+ },
1877
+ ]
1878
+
1879
+ const scored = scoreAssertions(assertions)
1880
+ return {
1881
+ scenarioId: 'blackboard-orchestrator-fit',
1882
+ name: 'Blackboard Orchestrator Fit',
1883
+ approvalMode: ctx.approvalMode,
1884
+ pluginMode: ctx.pluginMode,
1885
+ ...scored,
1886
+ assertions,
1887
+ sessionId: ctx.sessionId,
1888
+ workspaceDir: ctx.workspaceDir,
1889
+ requiredPlugins: [...ctx.requiredPlugins],
1890
+ effectivePlugins: [...ctx.effectivePlugins],
1891
+ missingPlugins: [...ctx.missingPlugins],
1892
+ toolNames: Array.from(ctx.toolNames),
1893
+ approvalIds: [],
1894
+ approvals: buildApprovalEvidence(ctx.sessionId),
1895
+ responseTexts: [...ctx.responseTexts],
1896
+ turns: [...ctx.turns],
1897
+ artifacts: buildArtifactEvidence(ctx, [noteRelativePath]),
1898
+ evidencePaths: writeScenarioEvidenceFiles(ctx),
1899
+ }
1900
+ } finally {
1901
+ const latestAgents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
1902
+ if (latestAgents[ctx.agentId]) {
1903
+ if (previousAssignScope) {
1904
+ latestAgents[ctx.agentId].platformAssignScope = previousAssignScope
1905
+ } else {
1906
+ delete latestAgents[ctx.agentId].platformAssignScope
1907
+ }
1908
+ latestAgents[ctx.agentId].updatedAt = Date.now()
1909
+ saveAgents(latestAgents)
1910
+ }
1911
+ if (previousAssignScope) {
1912
+ ctx.agent.platformAssignScope = previousAssignScope
1913
+ } else {
1914
+ delete ctx.agent.platformAssignScope
1915
+ }
1916
+ }
1917
+ }
1918
+
1713
1919
  /**
1714
1920
  * Tool-call efficiency scenario: verifies the agent uses minimal tool calls
1715
1921
  * for simple data-retrieval tasks. Catches regressions like:
@@ -1988,6 +2194,13 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
1988
2194
  plugins: ['http_request', 'files', 'browser'],
1989
2195
  run: runResearchBuildDeployScenario,
1990
2196
  },
2197
+ {
2198
+ id: 'blackboard-orchestrator-fit',
2199
+ name: 'Blackboard Orchestrator Fit',
2200
+ plugins: ['manage_agents', 'manage_tasks', 'files'],
2201
+ defaultInSuite: false,
2202
+ run: runBlackboardOrchestratorScenario,
2203
+ },
1991
2204
  {
1992
2205
  id: 'tool-call-efficiency',
1993
2206
  name: 'Tool Call Efficiency',
@@ -2008,8 +2221,15 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
2008
2221
  },
2009
2222
  ]
2010
2223
 
2224
+ export const DEFAULT_AGENT_REGRESSION_SCENARIO_IDS = AGENT_REGRESSION_SCENARIOS
2225
+ .filter((scenario) => scenario.defaultInSuite !== false)
2226
+ .map((scenario) => scenario.id)
2227
+
2011
2228
  function resolveScenarioDefinitions(ids?: string[]): AgentRegressionScenarioDefinition[] {
2012
- if (!ids?.length) return AGENT_REGRESSION_SCENARIOS
2229
+ if (!ids?.length) {
2230
+ const wanted = new Set(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS)
2231
+ return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
2232
+ }
2013
2233
  const wanted = new Set(ids)
2014
2234
  return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
2015
2235
  }
@@ -2,6 +2,8 @@ import test from 'node:test'
2
2
  import assert from 'node:assert/strict'
3
3
  import {
4
4
  inferAutomaticMemoryCategory,
5
+ isDirectMemoryWriteRequest,
6
+ isCurrentThreadRecallRequest,
5
7
  normalizeMemoryCategory,
6
8
  shouldAutoCaptureMemory,
7
9
  shouldInjectMemoryContext,
@@ -21,6 +23,36 @@ test('shouldInjectMemoryContext skips low-signal greetings and acknowledgements'
21
23
  assert.equal(shouldInjectMemoryContext('Compare the current deployment plan with what we decided yesterday'), true)
22
24
  })
23
25
 
26
+ test('isCurrentThreadRecallRequest detects same-thread recall without matching store commands', () => {
27
+ assert.equal(
28
+ isCurrentThreadRecallRequest('What preferences did I tell you earlier in this conversation? Answer from this conversation only.'),
29
+ true,
30
+ )
31
+ assert.equal(
32
+ isCurrentThreadRecallRequest('You just stored my favorite language in this chat. What was it?'),
33
+ true,
34
+ )
35
+ assert.equal(
36
+ isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns.'),
37
+ false,
38
+ )
39
+ assert.equal(
40
+ isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
41
+ false,
42
+ )
43
+ })
44
+
45
+ test('isDirectMemoryWriteRequest detects remember-and-confirm turns without matching recall questions', () => {
46
+ assert.equal(
47
+ isDirectMemoryWriteRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
48
+ true,
49
+ )
50
+ assert.equal(
51
+ isDirectMemoryWriteRequest('What preferences did I tell you earlier in this conversation?'),
52
+ false,
53
+ )
54
+ })
55
+
24
56
  test('shouldAutoCaptureMemory filters noisy turns', () => {
25
57
  assert.equal(shouldAutoCaptureMemory({ message: 'thanks', response: 'Happy to help with that.', source: 'chat' }), false)
26
58
  assert.equal(shouldAutoCaptureMemory({ message: 'Please save this to memory', response: 'Stored memory "note".', source: 'chat' }), false)
@@ -4,6 +4,10 @@ const ACK_RE = /^(?:ok(?:ay)?|cool|nice|got it|makes sense|thanks|thank you|thx|
4
4
  const GREETING_RE = /^(?:hi|hello|hey|yo|morning|good morning|good afternoon|good evening)[.! ]*$/i
5
5
  const MEMORY_META_RE = /\b(?:remember|memory|memorize|store this|save this|forget)\b/i
6
6
  const LOW_SIGNAL_RESPONSE_RE = /^(?:HEARTBEAT_OK|NO_MESSAGE)\b/i
7
+ const CURRENT_THREAD_RECALL_MARKER_RE = /\b(?:this conversation|this chat|this thread|current conversation|current chat|current thread|same thread|same chat|same conversation|earlier in (?:this )?(?:conversation|chat|thread)|from (?:this|our) (?:conversation|chat|thread)|you just stored|you just said|we just discussed|we just decided)\b/i
8
+ const CURRENT_THREAD_RECALL_INTENT_RE = /\b(?:what|which|who|when|where|did|remind|recap|summarize|repeat|list|tell me|answer|confirm|recall|mention)\b/i
9
+ const DIRECT_MEMORY_WRITE_MARKER_RE = /\b(?:remember|memorize|store|save|write to memory|add to memory|update.*memory|correct.*memory)\b/i
10
+ const DIRECT_MEMORY_WRITE_FOLLOWUP_RE = /\b(?:confirm|recap|repeat|summarize|what you just stored|what you saved|what you updated)\b/i
7
11
 
8
12
  function normalizeWhitespace(value: string): string {
9
13
  return value.replace(/\s+/g, ' ').trim()
@@ -21,6 +25,27 @@ export function shouldInjectMemoryContext(message: string): boolean {
21
25
  return true
22
26
  }
23
27
 
28
+ export function isCurrentThreadRecallRequest(message: string): boolean {
29
+ const trimmed = normalizeWhitespace(message)
30
+ if (!trimmed) return false
31
+ if (!CURRENT_THREAD_RECALL_MARKER_RE.test(trimmed)) return false
32
+ if (DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed) && DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
33
+ if (/\b(?:remember|store|save)\b/i.test(trimmed) && !/\?\s*$/.test(trimmed) && !/\b(?:what|which|who|when|where|did|confirm|recap|summarize|repeat|list|tell me|answer|recall)\b/i.test(trimmed)) {
34
+ return false
35
+ }
36
+ return CURRENT_THREAD_RECALL_INTENT_RE.test(trimmed) || /\?\s*$/.test(trimmed)
37
+ }
38
+
39
+ export function isDirectMemoryWriteRequest(message: string): boolean {
40
+ const trimmed = normalizeWhitespace(message)
41
+ if (!trimmed) return false
42
+ const directWriteLike = DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed)
43
+ if (!directWriteLike) return false
44
+ if (/\?\s*$/.test(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
45
+ if (isCurrentThreadRecallRequest(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
46
+ return true
47
+ }
48
+
24
49
  export function shouldAutoCaptureMemoryTurn(message: string, response: string): boolean {
25
50
  const normalizedMessage = normalizeWhitespace(message)
26
51
  const normalizedResponse = normalizeWhitespace(response)
@@ -69,6 +69,13 @@ describe('canonicalizePluginId', () => {
69
69
  assert.equal(canonicalizePluginId('memory_tool'), 'memory')
70
70
  })
71
71
 
72
+ it('resolves narrow memory tools → memory', () => {
73
+ assert.equal(canonicalizePluginId('memory_search'), 'memory')
74
+ assert.equal(canonicalizePluginId('memory_get'), 'memory')
75
+ assert.equal(canonicalizePluginId('memory_store'), 'memory')
76
+ assert.equal(canonicalizePluginId('memory_update'), 'memory')
77
+ })
78
+
72
79
  it('keeps files (already canonical)', () => {
73
80
  assert.equal(canonicalizePluginId('files'), 'files')
74
81
  })