npm - @swarmclawai/swarmclaw - Versions diffs - 0.8.0 → 0.8.2 - Mend

@swarmclawai/swarmclaw 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +8 -7
package/package.json +2 -2
package/src/app/api/notifications/route.ts +11 -12
package/src/app/page.tsx +9 -0
package/src/components/chat/chat-list.tsx +10 -9
package/src/components/home/home-view.tsx +13 -2
package/src/components/layout/app-layout.tsx +1 -0
package/src/components/shared/command-palette.tsx +4 -1
package/src/components/shared/notification-center.tsx +7 -1
package/src/components/shared/search-dialog.tsx +10 -2
package/src/lib/local-observability.test.ts +73 -0
package/src/lib/local-observability.ts +47 -0
package/src/lib/notification-utils.test.ts +72 -0
package/src/lib/notification-utils.ts +68 -0
package/src/lib/providers/openclaw.test.ts +21 -1
package/src/lib/providers/openclaw.ts +22 -0
package/src/lib/runtime-loop.ts +1 -1
package/src/lib/server/agent-thread-session.test.ts +41 -0
package/src/lib/server/agent-thread-session.ts +1 -0
package/src/lib/server/chat-execution-advanced.test.ts +7 -0
package/src/lib/server/chat-execution-eval-history.test.ts +111 -0
package/src/lib/server/chat-execution.ts +22 -5
package/src/lib/server/create-notification.test.ts +94 -0
package/src/lib/server/create-notification.ts +31 -25
package/src/lib/server/daemon-state.test.ts +50 -0
package/src/lib/server/daemon-state.ts +121 -38
package/src/lib/server/eval/agent-regression-advanced.test.ts +11 -0
package/src/lib/server/eval/agent-regression.test.ts +13 -1
package/src/lib/server/eval/agent-regression.ts +221 -1
package/src/lib/server/memory-policy.test.ts +32 -0
package/src/lib/server/memory-policy.ts +25 -0
package/src/lib/server/plugins-advanced.test.ts +7 -0
package/src/lib/server/runtime-settings.test.ts +2 -2
package/src/lib/server/session-tools/crud.test.ts +136 -0
package/src/lib/server/session-tools/crud.ts +44 -2
package/src/lib/server/session-tools/delegate-fallback.test.ts +36 -0
package/src/lib/server/session-tools/delegate.ts +30 -0
package/src/lib/server/session-tools/discovery-approvals.test.ts +40 -0
package/src/lib/server/session-tools/discovery.ts +7 -6
package/src/lib/server/session-tools/memory.ts +156 -6
package/src/lib/server/session-tools/session-tools-wiring.test.ts +12 -0
package/src/lib/server/session-tools/subagent.ts +4 -4
package/src/lib/server/storage.ts +14 -1
package/src/lib/server/stream-agent-chat.test.ts +78 -1
package/src/lib/server/stream-agent-chat.ts +225 -22
package/src/lib/server/tool-aliases.ts +1 -1
package/src/lib/server/tool-capability-policy.ts +1 -1
package/src/stores/use-app-store.ts +26 -1
package/src/types/index.ts +4 -0

package/src/lib/server/daemon-state.ts CHANGED Viewed

@@ -5,9 +5,9 @@ import { startScheduler, stopScheduler } from './scheduler'
 import { sweepOrphanedBrowsers, getActiveBrowserCount } from './session-tools'
 import {
   autoStartConnectors,
-  stopAllConnectors,
   listRunningConnectors,
   sendConnectorMessage,
+  stopAllConnectors,
   startConnector,
   getConnectorStatus,
   checkConnectorHealth,
@@ -25,7 +25,7 @@ import { WORKSPACE_DIR } from './data-dir'
 import { DEFAULT_HEARTBEAT_INTERVAL_SEC } from '@/lib/heartbeat-defaults'
 import { genId } from '@/lib/id'
 import path from 'node:path'
-import type { WebhookRetryEntry } from '@/types'
+import type { Session, WebhookRetryEntry } from '@/types'
 import { createNotification } from '@/lib/server/create-notification'
 import { pingProvider, OPENAI_COMPATIBLE_DEFAULTS } from '@/lib/server/provider-health'
 import { runIntegrityMonitor } from '@/lib/server/integrity-monitor'
@@ -75,17 +75,41 @@ function parseHeartbeatIntervalSec(value: unknown, fallback = DEFAULT_HEARTBEAT_
   return Math.max(0, Math.min(3600, Math.trunc(parsed)))
 }
-function normalizeWhatsappTarget(raw?: string | null): string | null {
-  const input = (raw || '').trim()
-  if (!input) return null
-  if (input.includes('@')) return input
-  let digits = input.replace(/[^\d+]/g, '')
-  if (digits.startsWith('+')) digits = digits.slice(1)
-  if (digits.startsWith('0') && digits.length >= 10) {
-    digits = `44${digits.slice(1)}`
-  }
-  digits = digits.replace(/[^\d]/g, '')
-  return digits ? `${digits}@s.whatsapp.net` : null
+export function shouldNotifyProviderReachabilityIssue(provider: string): boolean {
+  return provider !== 'openclaw'
+}
+const SYNTHETIC_HEALTH_SESSION_USERS = new Set(['workbench', 'comparison-bench'])
+const SYNTHETIC_HEALTH_SESSION_PREFIXES = ['wb-', 'cmp-']
+function hasSyntheticHealthPrefix(value: unknown): boolean {
+  const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''
+  return SYNTHETIC_HEALTH_SESSION_PREFIXES.some((prefix) => normalized.startsWith(prefix))
+}
+export function shouldSuppressSessionHeartbeatHealthAlert(
+  session: Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>,
+): boolean {
+  const user = typeof session.user === 'string' ? session.user.trim().toLowerCase() : ''
+  if (SYNTHETIC_HEALTH_SESSION_USERS.has(user)) return true
+  if (hasSyntheticHealthPrefix(session.id)) return true
+  if (hasSyntheticHealthPrefix(session.shortcutForAgentId)) return true
+  const name = typeof session.name === 'string' ? session.name.trim().toLowerCase() : ''
+  return name.startsWith('workbench ')
+    || name.startsWith('assistant benchmark ')
+    || name.startsWith('comparison ')
+}
+export function shouldSuppressSyntheticAgentHealthAlert(agentId: string): boolean {
+  return hasSyntheticHealthPrefix(agentId)
+}
+export function buildSessionHeartbeatHealthDedupKey(
+  sessionId: string,
+  state: 'stale' | 'auto-disabled',
+): string {
+  return `health-alert:session-heartbeat:${state}:${sessionId}`
 }
 // Store daemon state on globalThis to survive HMR reloads
@@ -268,23 +292,24 @@ function stopQueueProcessor() {
   }
 }
-async function sendHealthAlert(text: string) {
+async function sendHealthAlert(input: string | {
+  text: string
+  dedupKey?: string
+  entityType?: string
+  entityId?: string
+}) {
+  const payload = typeof input === 'string' ? { text: input } : input
+  const text = payload.text
   console.warn(`[health] ${text}`)
-  try {
-    const running = listRunningConnectors('whatsapp')
-    if (!running.length) return
-    const candidate = running[0]
-    const target = candidate.recentChannelId
-      || normalizeWhatsappTarget(candidate.configuredTargets[0] || null)
-    if (!target) return
-    await sendConnectorMessage({
-      connectorId: candidate.id,
-      channelId: target,
-      text: `⚠️ SwarmClaw health alert: ${text}`,
-    })
-  } catch {
-    // alerts are best effort; log-only fallback is acceptable
-  }
+  createNotification({
+    type: 'warning',
+    title: 'SwarmClaw health alert',
+    message: text,
+    dedupKey: payload.dedupKey || `health-alert:${text}`,
+    entityType: payload.entityType,
+    entityId: payload.entityId,
+    dispatchExternally: false,
+  })
 }
 async function runConnectorHealthChecks(now: number) {
@@ -526,6 +551,7 @@ async function runProviderHealthChecks() {
   for (const agent of Object.values(agents) as Record<string, unknown>[]) {
     if (!agent?.id || typeof agent.id !== 'string') continue
+    if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
     const provider = typeof agent.provider === 'string' ? agent.provider : ''
     if (!provider || ['claude-cli', 'codex-cli', 'opencode-cli'].includes(provider)) continue
@@ -564,9 +590,11 @@ async function runProviderHealthChecks() {
     const result = await pingProvider(tuple.provider, apiKey, endpoint)
     if (!result.ok) {
-      const dedupKey = tuple.provider === 'openclaw'
-        ? `openclaw-down:${tuple.agentId}`
-        : `provider-down:${tuple.credentialId || tuple.provider}`
+      if (!shouldNotifyProviderReachabilityIssue(tuple.provider)) {
+        continue
+      }
+      const dedupKey = `provider-down:${tuple.credentialId || tuple.provider}`
       const entityType = tuple.credentialId ? 'credential' : undefined
       const entityId = tuple.credentialId || undefined
@@ -596,6 +624,7 @@ async function runOpenClawGatewayHealthChecks() {
   for (const agent of Object.values(agents) as Record<string, unknown>[]) {
     if (!agent?.id || typeof agent.id !== 'string') continue
+    if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
     if (agent.provider !== 'openclaw') continue
     const key = `openclaw:${agent.id}`
@@ -747,6 +776,11 @@ async function runHealthChecks() {
     if (session.heartbeatEnabled !== true) continue
     const sessionId = session.id
+    if (shouldSuppressSessionHeartbeatHealthAlert(session as Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>)) {
+      ds.staleSessionIds.delete(sessionId)
+      continue
+    }
     const sessionLabel = String(session.name || sessionId)
     const intervalSec = parseHeartbeatIntervalSec(session.heartbeatIntervalSec, DEFAULT_HEARTBEAT_INTERVAL_SEC)
     if (intervalSec <= 0) continue
@@ -762,9 +796,12 @@ async function runHealthChecks() {
         session.lastActiveAt = now
         sessionsDirty = true
         ds.staleSessionIds.delete(sessionId)
-        await sendHealthAlert(
-          `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
-        )
+        await sendHealthAlert({
+          text: `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
+          dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'auto-disabled'),
+          entityType: 'session',
+          entityId: sessionId,
+        })
         continue
       }
@@ -772,9 +809,12 @@ async function runHealthChecks() {
       // Only alert on transition from healthy → stale (once per stale episode)
       if (!ds.staleSessionIds.has(sessionId)) {
         ds.staleSessionIds.add(sessionId)
-        await sendHealthAlert(
-          `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
-        )
+        await sendHealthAlert({
+          text: `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
+          dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'stale'),
+          entityType: 'session',
+          entityId: sessionId,
+        })
       }
     }
   }
@@ -980,6 +1020,49 @@ function stopEvalScheduler() {
   }
 }
+function refreshDaemonTimersForHotReload() {
+  if (!ds.running) return
+  if (ds.queueIntervalId) {
+    clearInterval(ds.queueIntervalId)
+    ds.queueIntervalId = null
+    startQueueProcessor()
+  }
+  if (ds.browserSweepId) {
+    clearInterval(ds.browserSweepId)
+    ds.browserSweepId = null
+    startBrowserSweep()
+  }
+  if (ds.healthIntervalId) {
+    clearInterval(ds.healthIntervalId)
+    ds.healthIntervalId = null
+    startHealthMonitor()
+  }
+  if (ds.connectorHealthIntervalId) {
+    clearInterval(ds.connectorHealthIntervalId)
+    ds.connectorHealthIntervalId = null
+    startConnectorHealthMonitor()
+  }
+  if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) {
+    stopMemoryConsolidation()
+    startMemoryConsolidation()
+  }
+  if (ds.evalSchedulerIntervalId) {
+    stopEvalScheduler()
+    startEvalScheduler()
+  }
+}
+// In dev/HMR, the daemon state survives on globalThis while interval callbacks keep
+// the old module closure alive. Refresh long-lived timers so they always run the
+// current module's logic instead of stale health-alert code paths.
+refreshDaemonTimersForHotReload()
 export async function runDaemonHealthCheckNow() {
   await Promise.all([
     runHealthChecks(),

package/src/lib/server/eval/agent-regression-advanced.test.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { describe, it } from 'node:test'
 import {
   AGENT_REGRESSION_SCENARIOS,
+  DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
   resolveRegressionApprovalSettings,
   resolveRegressionPlugins,
   scoreAssertions,
@@ -266,6 +267,10 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
       'mock-signup-secret-email',
       'human-verified-signup',
       'research-build-deploy',
+      'blackboard-orchestrator-fit',
+      'tool-call-efficiency',
+      'file-creation-followthrough',
+      'knowledge-first-file',
     ])
   })
@@ -282,6 +287,12 @@ describe('AGENT_REGRESSION_SCENARIOS registry', () => {
     }
   })
+  it('default suite ids exclude exploratory regressions unless explicitly requested', () => {
+    assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
+    assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('approval-resume'))
+    assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('knowledge-first-file'))
+  })
   it('no duplicate scenario IDs', () => {
     const ids = AGENT_REGRESSION_SCENARIOS.map((s) => s.id)
     const unique = new Set(ids)

package/src/lib/server/eval/agent-regression.test.ts CHANGED Viewed

@@ -1,6 +1,12 @@
 import assert from 'node:assert/strict'
 import { describe, it } from 'node:test'
-import { AGENT_REGRESSION_SCENARIOS, resolveRegressionApprovalSettings, resolveRegressionPlugins, scoreAssertions } from './agent-regression'
+import {
+  AGENT_REGRESSION_SCENARIOS,
+  DEFAULT_AGENT_REGRESSION_SCENARIO_IDS,
+  resolveRegressionApprovalSettings,
+  resolveRegressionPlugins,
+  scoreAssertions,
+} from './agent-regression'
 describe('agent regression helpers', () => {
   it('maps approval modes onto deterministic platform settings', () => {
@@ -42,12 +48,18 @@ describe('agent regression helpers', () => {
       'mock-signup-secret-email',
       'human-verified-signup',
       'research-build-deploy',
+      'blackboard-orchestrator-fit',
       'tool-call-efficiency',
       'file-creation-followthrough',
       'knowledge-first-file',
     ])
   })
+  it('keeps exploratory scenarios out of the default suite score path', () => {
+    assert.ok(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('research-build-deploy'))
+    assert.ok(!DEFAULT_AGENT_REGRESSION_SCENARIO_IDS.includes('blackboard-orchestrator-fit'))
+  })
   it('can resolve regressions against the agent capability set instead of injected scenario plugins', () => {
     const resolved = resolveRegressionPlugins(
       ['delegate', 'browser', 'manage_secrets', 'email'],

package/src/lib/server/eval/agent-regression.ts CHANGED Viewed

@@ -28,6 +28,7 @@ import {
   loadTasks,
   loadWatchJobs,
   saveSchedules,
+  saveAgents,
   saveSecrets,
   saveSessions,
   saveSettings,
@@ -104,6 +105,7 @@ interface AgentRegressionScenarioDefinition {
   id: string
   name: string
   plugins: string[]
+  defaultInSuite?: boolean
   run: (ctx: ScenarioContext) => Promise<AgentRegressionScenarioResult>
 }
@@ -927,6 +929,15 @@ function cleanupScenarioState(ctx: ScenarioContext): void {
     deleteApproval(approval.id)
   }
+  const agents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
+  let agentsChanged = false
+  for (const [agentId, agent] of Object.entries(agents)) {
+    if (agent?.createdInSessionId !== ctx.sessionId) continue
+    delete agents[agentId]
+    agentsChanged = true
+  }
+  if (agentsChanged) saveAgents(agents)
   const watchJobs = loadWatchJobs() as Record<string, Record<string, unknown>>
   for (const [watchJobId, watchJob] of Object.entries(watchJobs)) {
     if (watchJob?.sessionId === ctx.sessionId) deleteWatchJob(watchJobId)
@@ -1710,6 +1721,201 @@ async function runResearchBuildDeployScenario(ctx: ScenarioContext): Promise<Age
   }
 }
+async function runBlackboardOrchestratorScenario(ctx: ScenarioContext): Promise<AgentRegressionScenarioResult> {
+  const noteRelativePath = 'ops/blackboard-fit.md'
+  const notePath = scenarioFile(ctx, noteRelativePath)
+  const prefix = `Eval ${ctx.sessionId.slice(-8)}`
+  const departments = [
+    { agentName: `${prefix} Research Orchestrator`, taskTitle: `${prefix} research-blackboard` },
+    { agentName: `${prefix} Product Orchestrator`, taskTitle: `${prefix} product-blackboard` },
+    { agentName: `${prefix} Revenue Orchestrator`, taskTitle: `${prefix} revenue-blackboard` },
+    { agentName: `${prefix} Operations Orchestrator`, taskTitle: `${prefix} operations-blackboard` },
+    { agentName: `${prefix} Support Orchestrator`, taskTitle: `${prefix} support-blackboard` },
+  ]
+  const agentsBefore = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
+  const currentAgent = agentsBefore[ctx.agentId]
+  const previousAssignScope = typeof currentAgent?.platformAssignScope === 'string'
+    ? currentAgent.platformAssignScope
+    : undefined
+  if (currentAgent) {
+    currentAgent.platformAssignScope = 'all'
+    currentAgent.updatedAt = Date.now()
+    agentsBefore[ctx.agentId] = currentAgent
+    saveAgents(agentsBefore)
+    ctx.agent.platformAssignScope = 'all'
+  }
+  try {
+    const prompt = [
+      'Evaluate whether SwarmClaw can support a zero-work KING COO orchestrator model.',
+      'Do not do any department implementation work yourself.',
+      'Use manage_agents to create exactly five full agents with these exact names:',
+      ...departments.map((department) => `- ${department.agentName}`),
+      'Give each agent a short soul that describes a department orchestrator or execution lead.',
+      'Use manage_tasks to create exactly five backlog tasks with these exact titles and assign one task to each new agent:',
+      ...departments.map((department) => `- ${department.taskTitle}`),
+      `Write "${noteRelativePath}" with sections "Supported Today", "Native Gaps", and "Bridging Plan".`,
+      'In that note, mention that SwarmClaw already has native agents, task queues, memory, and chatroom/connector communication primitives.',
+      'Also state clearly that SurrealDB would currently be an external integration or custom backing store, not a native built-in blackboard database.',
+      'In your final response list the created agent ids, the created task ids, reference the note path, and say explicitly that the orchestrator stayed coordinator-only.',
+    ].join('\n')
+    await runTurn(ctx, prompt)
+    let createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
+      .filter((agent) => agent?.createdInSessionId === ctx.sessionId)
+    let createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
+      .filter((task) => task?.createdInSessionId === ctx.sessionId)
+    if (createdAgents.length < departments.length || createdTasks.length < departments.length || !fs.existsSync(notePath)) {
+      await runTurn(
+        ctx,
+        'Finish the orchestration setup exactly as requested. Create any missing agents, create any missing backlog tasks assigned to those agents, and write the missing architecture note. Do not do department implementation work yourself.',
+      )
+      createdAgents = Object.values(loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>)
+        .filter((agent) => agent?.createdInSessionId === ctx.sessionId)
+      createdTasks = Object.values(loadTasks() as Record<string, Record<string, unknown>>)
+        .filter((task) => task?.createdInSessionId === ctx.sessionId)
+    }
+    const expectedAgentNames = new Set(departments.map((department) => department.agentName))
+    const expectedTaskTitles = new Set(departments.map((department) => department.taskTitle))
+    const createdAgentIds = new Set(
+      createdAgents
+        .map((agent) => (typeof agent.id === 'string' ? agent.id : ''))
+        .filter(Boolean),
+    )
+    const createdTaskTitles = new Set(
+      createdTasks
+        .map((task) => (typeof task.title === 'string' ? task.title : ''))
+        .filter(Boolean),
+    )
+    const allTasksAssignedToCreatedAgents = createdTasks.length > 0 && createdTasks.every((task) => (
+      typeof task.agentId === 'string' && createdAgentIds.has(task.agentId)
+    ))
+    const noTasksAssignedToCoordinator = createdTasks.every((task) => task.agentId !== ctx.agentId)
+    const statusesAcceptable = createdTasks.every((task) => ['backlog', 'queued'].includes(String(task.status || '')))
+    let noteText = readIfExists(notePath)
+    let responseBlob = ctx.responseTexts.join('\n').toLowerCase()
+    const hasCoordinatorSummary = () => (
+      responseBlob.includes(noteRelativePath.toLowerCase())
+      && (
+        responseBlob.includes('coordinator-only')
+        || responseBlob.includes('stayed coordinator')
+        || responseBlob.includes('did not do department implementation')
+      )
+    )
+    const hasFitGapNote = () => {
+      const noteLower = noteText.toLowerCase()
+      return noteText.includes('## Supported Today')
+        && noteText.includes('## Native Gaps')
+        && noteText.includes('## Bridging Plan')
+        && noteLower.includes('surrealdb')
+        && (noteLower.includes('external integration') || noteLower.includes('not a native') || noteLower.includes('custom backing store'))
+        && noteLower.includes('task')
+        && noteLower.includes('agent')
+        && (noteLower.includes('chatroom') || noteLower.includes('connector'))
+        && noteLower.includes('memory')
+    }
+    if (!hasFitGapNote() || !hasCoordinatorSummary()) {
+      await runTurn(
+        ctx,
+        [
+          `If "${noteRelativePath}" is missing or incomplete, write it now with the required sections and SurrealDB gap explanation.`,
+          'Then reply with a concise summary that lists the created agent ids, the created task ids, references the note path exactly, and says the orchestrator stayed coordinator-only.',
+        ].join(' '),
+      )
+      noteText = readIfExists(notePath)
+      responseBlob = ctx.responseTexts.join('\n').toLowerCase()
+    }
+    const assertions: RegressionAssertion[] = [
+      {
+        name: 'manage_agents used',
+        passed: ctx.toolNames.has('manage_agents'),
+        weight: 2,
+      },
+      {
+        name: 'manage_tasks used',
+        passed: ctx.toolNames.has('manage_tasks'),
+        weight: 2,
+      },
+      {
+        name: 'five orchestrator agents created',
+        passed: createdAgents.length === departments.length
+          && createdAgents.every((agent) => expectedAgentNames.has(String(agent.name || ''))),
+        details: createdAgents.map((agent) => `${agent.id}:${agent.name}`).join(' | '),
+        weight: 3,
+      },
+      {
+        name: 'five backlog tasks assigned to created agents',
+        passed: createdTasks.length === departments.length
+          && [...expectedTaskTitles].every((title) => createdTaskTitles.has(title))
+          && allTasksAssignedToCreatedAgents
+          && statusesAcceptable,
+        details: createdTasks.map((task) => `${task.id}:${task.title}:${task.agentId}:${task.status}`).join(' | '),
+        weight: 3,
+      },
+      {
+        name: 'coordinator kept execution off itself',
+        passed: noTasksAssignedToCoordinator,
+        weight: 2,
+      },
+      {
+        name: 'fit-gap note explains native primitives and SurrealDB gap',
+        passed: hasFitGapNote(),
+        details: truncatePreview(noteText),
+        weight: 3,
+      },
+      {
+        name: 'final response references coordinator-only orchestration note',
+        passed: hasCoordinatorSummary(),
+      },
+    ]
+    const scored = scoreAssertions(assertions)
+    return {
+      scenarioId: 'blackboard-orchestrator-fit',
+      name: 'Blackboard Orchestrator Fit',
+      approvalMode: ctx.approvalMode,
+      pluginMode: ctx.pluginMode,
+      ...scored,
+      assertions,
+      sessionId: ctx.sessionId,
+      workspaceDir: ctx.workspaceDir,
+      requiredPlugins: [...ctx.requiredPlugins],
+      effectivePlugins: [...ctx.effectivePlugins],
+      missingPlugins: [...ctx.missingPlugins],
+      toolNames: Array.from(ctx.toolNames),
+      approvalIds: [],
+      approvals: buildApprovalEvidence(ctx.sessionId),
+      responseTexts: [...ctx.responseTexts],
+      turns: [...ctx.turns],
+      artifacts: buildArtifactEvidence(ctx, [noteRelativePath]),
+      evidencePaths: writeScenarioEvidenceFiles(ctx),
+    }
+  } finally {
+    const latestAgents = loadAgents({ includeTrashed: true }) as Record<string, Record<string, unknown>>
+    if (latestAgents[ctx.agentId]) {
+      if (previousAssignScope) {
+        latestAgents[ctx.agentId].platformAssignScope = previousAssignScope
+      } else {
+        delete latestAgents[ctx.agentId].platformAssignScope
+      }
+      latestAgents[ctx.agentId].updatedAt = Date.now()
+      saveAgents(latestAgents)
+    }
+    if (previousAssignScope) {
+      ctx.agent.platformAssignScope = previousAssignScope
+    } else {
+      delete ctx.agent.platformAssignScope
+    }
+  }
+}
 /**
  * Tool-call efficiency scenario: verifies the agent uses minimal tool calls
  * for simple data-retrieval tasks. Catches regressions like:
@@ -1988,6 +2194,13 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
     plugins: ['http_request', 'files', 'browser'],
     run: runResearchBuildDeployScenario,
   },
+  {
+    id: 'blackboard-orchestrator-fit',
+    name: 'Blackboard Orchestrator Fit',
+    plugins: ['manage_agents', 'manage_tasks', 'files'],
+    defaultInSuite: false,
+    run: runBlackboardOrchestratorScenario,
+  },
   {
     id: 'tool-call-efficiency',
     name: 'Tool Call Efficiency',
@@ -2008,8 +2221,15 @@ export const AGENT_REGRESSION_SCENARIOS: AgentRegressionScenarioDefinition[] = [
   },
 ]
+export const DEFAULT_AGENT_REGRESSION_SCENARIO_IDS = AGENT_REGRESSION_SCENARIOS
+  .filter((scenario) => scenario.defaultInSuite !== false)
+  .map((scenario) => scenario.id)
 function resolveScenarioDefinitions(ids?: string[]): AgentRegressionScenarioDefinition[] {
-  if (!ids?.length) return AGENT_REGRESSION_SCENARIOS
+  if (!ids?.length) {
+    const wanted = new Set(DEFAULT_AGENT_REGRESSION_SCENARIO_IDS)
+    return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
+  }
   const wanted = new Set(ids)
   return AGENT_REGRESSION_SCENARIOS.filter((scenario) => wanted.has(scenario.id))
 }

package/src/lib/server/memory-policy.test.ts CHANGED Viewed

@@ -2,6 +2,8 @@ import test from 'node:test'
 import assert from 'node:assert/strict'
 import {
   inferAutomaticMemoryCategory,
+  isDirectMemoryWriteRequest,
+  isCurrentThreadRecallRequest,
   normalizeMemoryCategory,
   shouldAutoCaptureMemory,
   shouldInjectMemoryContext,
@@ -21,6 +23,36 @@ test('shouldInjectMemoryContext skips low-signal greetings and acknowledgements'
   assert.equal(shouldInjectMemoryContext('Compare the current deployment plan with what we decided yesterday'), true)
 })
+test('isCurrentThreadRecallRequest detects same-thread recall without matching store commands', () => {
+  assert.equal(
+    isCurrentThreadRecallRequest('What preferences did I tell you earlier in this conversation? Answer from this conversation only.'),
+    true,
+  )
+  assert.equal(
+    isCurrentThreadRecallRequest('You just stored my favorite language in this chat. What was it?'),
+    true,
+  )
+  assert.equal(
+    isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns.'),
+    false,
+  )
+  assert.equal(
+    isCurrentThreadRecallRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
+    false,
+  )
+})
+test('isDirectMemoryWriteRequest detects remember-and-confirm turns without matching recall questions', () => {
+  assert.equal(
+    isDirectMemoryWriteRequest('Remember that my favorite programming language is Rust and I prefer functional programming patterns. Then confirm what you just stored.'),
+    true,
+  )
+  assert.equal(
+    isDirectMemoryWriteRequest('What preferences did I tell you earlier in this conversation?'),
+    false,
+  )
+})
 test('shouldAutoCaptureMemory filters noisy turns', () => {
   assert.equal(shouldAutoCaptureMemory({ message: 'thanks', response: 'Happy to help with that.', source: 'chat' }), false)
   assert.equal(shouldAutoCaptureMemory({ message: 'Please save this to memory', response: 'Stored memory "note".', source: 'chat' }), false)

package/src/lib/server/memory-policy.ts CHANGED Viewed

@@ -4,6 +4,10 @@ const ACK_RE = /^(?:ok(?:ay)?|cool|nice|got it|makes sense|thanks|thank you|thx|
 const GREETING_RE = /^(?:hi|hello|hey|yo|morning|good morning|good afternoon|good evening)[.! ]*$/i
 const MEMORY_META_RE = /\b(?:remember|memory|memorize|store this|save this|forget)\b/i
 const LOW_SIGNAL_RESPONSE_RE = /^(?:HEARTBEAT_OK|NO_MESSAGE)\b/i
+const CURRENT_THREAD_RECALL_MARKER_RE = /\b(?:this conversation|this chat|this thread|current conversation|current chat|current thread|same thread|same chat|same conversation|earlier in (?:this )?(?:conversation|chat|thread)|from (?:this|our) (?:conversation|chat|thread)|you just stored|you just said|we just discussed|we just decided)\b/i
+const CURRENT_THREAD_RECALL_INTENT_RE = /\b(?:what|which|who|when|where|did|remind|recap|summarize|repeat|list|tell me|answer|confirm|recall|mention)\b/i
+const DIRECT_MEMORY_WRITE_MARKER_RE = /\b(?:remember|memorize|store|save|write to memory|add to memory|update.*memory|correct.*memory)\b/i
+const DIRECT_MEMORY_WRITE_FOLLOWUP_RE = /\b(?:confirm|recap|repeat|summarize|what you just stored|what you saved|what you updated)\b/i
 function normalizeWhitespace(value: string): string {
   return value.replace(/\s+/g, ' ').trim()
@@ -21,6 +25,27 @@ export function shouldInjectMemoryContext(message: string): boolean {
   return true
 }
+export function isCurrentThreadRecallRequest(message: string): boolean {
+  const trimmed = normalizeWhitespace(message)
+  if (!trimmed) return false
+  if (!CURRENT_THREAD_RECALL_MARKER_RE.test(trimmed)) return false
+  if (DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed) && DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
+  if (/\b(?:remember|store|save)\b/i.test(trimmed) && !/\?\s*$/.test(trimmed) && !/\b(?:what|which|who|when|where|did|confirm|recap|summarize|repeat|list|tell me|answer|recall)\b/i.test(trimmed)) {
+    return false
+  }
+  return CURRENT_THREAD_RECALL_INTENT_RE.test(trimmed) || /\?\s*$/.test(trimmed)
+}
+export function isDirectMemoryWriteRequest(message: string): boolean {
+  const trimmed = normalizeWhitespace(message)
+  if (!trimmed) return false
+  const directWriteLike = DIRECT_MEMORY_WRITE_MARKER_RE.test(trimmed)
+  if (!directWriteLike) return false
+  if (/\?\s*$/.test(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
+  if (isCurrentThreadRecallRequest(trimmed) && !DIRECT_MEMORY_WRITE_FOLLOWUP_RE.test(trimmed)) return false
+  return true
+}
 export function shouldAutoCaptureMemoryTurn(message: string, response: string): boolean {
   const normalizedMessage = normalizeWhitespace(message)
   const normalizedResponse = normalizeWhitespace(response)

package/src/lib/server/plugins-advanced.test.ts CHANGED Viewed

@@ -69,6 +69,13 @@ describe('canonicalizePluginId', () => {
     assert.equal(canonicalizePluginId('memory_tool'), 'memory')
   })
+  it('resolves narrow memory tools → memory', () => {
+    assert.equal(canonicalizePluginId('memory_search'), 'memory')
+    assert.equal(canonicalizePluginId('memory_get'), 'memory')
+    assert.equal(canonicalizePluginId('memory_store'), 'memory')
+    assert.equal(canonicalizePluginId('memory_update'), 'memory')
+  })
   it('keeps files (already canonical)', () => {
     assert.equal(canonicalizePluginId('files'), 'files')
   })