@swarmclawai/swarmclaw 1.5.38 → 1.5.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -389,6 +389,15 @@ Operational docs: https://swarmclaw.ai/docs/observability
389
389
 
390
390
  ## Releases
391
391
 
392
+ ### v1.5.39 Highlights
393
+
394
+ - **Agents default to scoped tool access**: new agents (and existing agents whose `tools` list is non-empty) now only see the tools they've been given in the system prompt. This trims ~3 k input tokens per turn — an observed CEO/coordinator agent with 14 tools and 4 loaded skills went from 62 k to 38 k chars of system prompt. Opt back into the old firehose by toggling **Universal tool access** in the agent sheet's new "Context & Tool Access" section. Memory, context management, and `ask_human` are always included regardless of the scoped list.
395
+ - **Pinned skills budget hardening**: one long markdown skill was eating 24 k of a 62 k prompt. Inlined pinned-skill content is now capped at 3 k chars with a pointer to `use_skill` action="load" for the full guide, and auto-attached *learned* skills get a dedicated sub-budget (max 6 skills / 8 k chars) so they cannot dominate the main pinned-skills section.
396
+ - **OpenClaw chat fast-fails on dangling credentials**: v1.5.38 added gateway-side fast-fail; the chat streaming path now does the same, emitting a clear `err` event naming the missing credential instead of dialing the gateway unauthenticated and waiting 120 s for the timeout.
397
+ - **Queue: orphan-recovery auto-heals stale checkouts**: pre-1.5.38 storage could leave `queued` tasks with a stale `checkoutRunId` that `checkoutTask()` refused forever. Orphan recovery now clears the stale id in the same sweep that re-queues the task, and `reconcileFinishedRunningTasks` / agent-not-found / capability-mismatch paths also null out the checkout when they terminally fail a task.
398
+ - **Perf ring buffer raised to 2 000 entries**: queue/task repository events fire ~20 Hz during task processing and were evicting chat-execution/prompt perf entries out of the 200-entry buffer before they could be read. The larger buffer lets the perf viewer actually show a full turn.
399
+ - **Tests**: added regression tests for pre-1.5.38 stale-checkout orphan recovery and for the scoped-tool-access algorithm.
400
+
392
401
  ### v1.5.38 Highlights
393
402
 
394
403
  - **Task queue: reclaim stale checkouts**: `checkoutTask()` now reclaims a lingering `checkoutRunId` on a `queued` task instead of refusing it forever. An ungraceful server exit mid-turn (crash, SIGKILL, HMR reload) previously left tasks uncheckoutable, producing a dispatch → orphan-recovery → failed-checkout spin that logged "Recovering orphaned queued task" tens of thousands of times per session. `scheduleRetryOrDeadLetter()` also clears the prior checkout when scheduling a retry or dead-lettering.
@@ -422,12 +431,6 @@ Operational docs: https://swarmclaw.ai/docs/observability
422
431
  - **Gateway credential resolution logging**: when a gateway credential can't be resolved, the server now logs a clear warning identifying the missing credential ID.
423
432
  - **Credential decryption error logging**: when a stored credential can't be decrypted (e.g. after `CREDENTIAL_SECRET` changes), the server now logs the credential ID and provider so users know which key to re-add.
424
433
 
425
- ### v1.5.34 Highlights
426
-
427
- - **Ollama Cloud auth fix**: SwarmClaw now normalizes `api.ollama.com` and `www.ollama.com` to `ollama.com` before making authenticated requests, avoiding the redirect that was dropping authorization headers and causing false provider-health/runtime failures.
428
- - **Chat execution context hardening**: tool invocation now resolves names case-insensitively, oversized tool results are truncated before they are fed back into the model, and proactive grounding/heartbeat prompts stay smaller under pressure to reduce avoidable context blowouts.
429
- - **API compatibility fixes**: OpenAI-compatible streaming now captures reasoning deltas from providers that emit them outside `delta.content`, and A2A endpoints are exempt from the main proxy access-key gate so they can rely on their own auth scheme.
430
-
431
434
  Older releases: https://swarmclaw.ai/docs/release-notes
432
435
 
433
436
  - GitHub releases: https://github.com/swarmclawai/swarmclaw/releases
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@swarmclawai/swarmclaw",
3
- "version": "1.5.38",
3
+ "version": "1.5.39",
4
4
  "description": "Build and run autonomous AI agents with OpenClaw, Hermes, multiple model providers, orchestration, delegation, memory, skills, schedules, and chat connectors.",
5
5
  "main": "electron-dist/main.js",
6
6
  "license": "MIT",
@@ -210,6 +210,11 @@ export function AgentSheet() {
210
210
  const [delegationTargetMode, setDelegationTargetMode] = useState<'all' | 'selected'>('all')
211
211
  const [delegationTargetAgentIds, setDelegationTargetAgentIds] = useState<string[]>([])
212
212
  const [tools, setTools] = useState<string[]>([])
213
+ // Scoped tool access is the default for new agents (cuts ~3 k input tokens
214
+ // per turn). Existing agents with no toolAccessMode field persisted stay
215
+ // universal server-side for backward compat; the new-agent setup path
216
+ // below also explicitly writes 'scoped' so it persists on save.
217
+ const [toolAccessMode, setToolAccessMode] = useState<'universal' | 'scoped'>('scoped')
213
218
  const [extensions, setExtensions] = useState<string[]>([])
214
219
  const [enabledExtensionIds, setEnabledExtensionIds] = useState<Set<string> | null>(null)
215
220
  const [skills, setSkills] = useState<string[]>([])
@@ -415,6 +420,7 @@ export function AgentSheet() {
415
420
  setDelegationTargetMode(editing.delegationTargetMode === 'selected' ? 'selected' : 'all')
416
421
  setDelegationTargetAgentIds(editing.delegationTargetAgentIds || [])
417
422
  setTools(getEnabledToolIds(editing))
423
+ setToolAccessMode(editing.toolAccessMode === 'scoped' ? 'scoped' : 'universal')
418
424
  setExtensions(getEnabledExtensionIds(editing))
419
425
  setSkills(editing.skills || [])
420
426
  setSkillIds(editing.skillIds || [])
@@ -497,6 +503,7 @@ export function AgentSheet() {
497
503
  setDelegationTargetMode(src.delegationTargetMode === 'selected' ? 'selected' : 'all')
498
504
  setDelegationTargetAgentIds(src.delegationTargetAgentIds || [])
499
505
  setTools(getEnabledToolIds(src))
506
+ setToolAccessMode(src.toolAccessMode === 'scoped' ? 'scoped' : 'universal')
500
507
  setExtensions(getEnabledExtensionIds(src))
501
508
  setSkills(src.skills || [])
502
509
  setSkillIds(src.skillIds || [])
@@ -576,6 +583,7 @@ export function AgentSheet() {
576
583
  setDelegationTargetMode('all')
577
584
  setDelegationTargetAgentIds([])
578
585
  setTools(getDefaultAgentToolIds())
586
+ setToolAccessMode('scoped')
579
587
  setExtensions([])
580
588
  setSkills([])
581
589
  setSkillIds([])
@@ -783,6 +791,7 @@ export function AgentSheet() {
783
791
  delegationTargetMode: delegationEnabled || role === 'coordinator' ? delegationTargetMode : 'all',
784
792
  delegationTargetAgentIds: (delegationEnabled || role === 'coordinator') && delegationTargetMode === 'selected' ? delegationTargetAgentIds : [],
785
793
  tools,
794
+ toolAccessMode,
786
795
  extensions,
787
796
  skills,
788
797
  skillIds,
@@ -2005,6 +2014,30 @@ export function AgentSheet() {
2005
2014
  summary={advancedSummary}
2006
2015
  badges={agentAdvancedBadges}
2007
2016
  >
2017
+ <SectionCard
2018
+ title="Context & Tool Access"
2019
+ description="Control how many tools are described in this agent's system prompt. Scoped (default) keeps the agent focused and saves ~3 k input tokens per turn; Universal gives it visibility into every built-in tool."
2020
+ className="mb-6 border-white/[0.05] bg-white/[0.01]"
2021
+ >
2022
+ <div className="space-y-3">
2023
+ <label className="flex items-center gap-3 cursor-pointer">
2024
+ <div
2025
+ onClick={() => setToolAccessMode((current) => current === 'universal' ? 'scoped' : 'universal')}
2026
+ className={`w-11 h-6 rounded-full transition-all duration-200 relative cursor-pointer shrink-0 ${toolAccessMode === 'universal' ? 'bg-accent-bright' : 'bg-white/[0.08]'}`}
2027
+ >
2028
+ <div className={`absolute top-0.5 w-5 h-5 rounded-full bg-white transition-all duration-200 ${toolAccessMode === 'universal' ? 'left-[22px]' : 'left-0.5'}`} />
2029
+ </div>
2030
+ <span className="text-[13px] text-text-2">Universal tool access</span>
2031
+ <HintTip text="Off (default, recommended): the agent only sees tools enabled in its Tools list. On: every built-in tool is described in the system prompt. Turn on only for coordinator agents that need visibility across every possible downstream tool, or temporarily for debugging." />
2032
+ </label>
2033
+ <p className="text-[12px] text-text-3/70 pl-[56px] -mt-1">
2034
+ {toolAccessMode === 'universal'
2035
+ ? 'Full tool universe is injected into the prompt. Costs ~3 k more input tokens per turn.'
2036
+ : 'Only the tools enabled above are visible to the agent — this is the focused default.'}
2037
+ </p>
2038
+ </div>
2039
+ </SectionCard>
2040
+
2008
2041
  <SectionCard
2009
2042
  title="Voice & Autonomy"
2010
2043
  description="Tune voice and the detailed heartbeat behavior for this agent."
@@ -422,6 +422,21 @@ export function streamOpenClawChat({ session, message, imagePath, apiKey, write,
422
422
 
423
423
  const wsUrl = session.apiEndpoint ? deriveOpenClawWsUrl(session.apiEndpoint) : 'ws://127.0.0.1:18789'
424
424
  const token = apiKey || session.apiKey || undefined
425
+ // If the session references a credential but nothing resolved, the credential
426
+ // was deleted or corrupted. Fail fast with a clear error instead of dialing
427
+ // the gateway unauthenticated and timing out 120 s later (the original symptom
428
+ // behind the "OpenClaw gateway timed out after 120 s" report).
429
+ const credentialIdSet = typeof session.credentialId === 'string' && session.credentialId.trim().length > 0
430
+ if (credentialIdSet && !token) {
431
+ return Promise.resolve().then(() => {
432
+ active.delete(session.id)
433
+ write(`data: ${JSON.stringify({
434
+ t: 'err',
435
+ text: `OpenClaw credential "${session.credentialId}" is missing from the credential store. Reattach an existing credential or create a new one in Settings → Credentials.`,
436
+ })}\n\n`)
437
+ return ''
438
+ })
439
+ }
425
440
  return new Promise((resolve) => {
426
441
  let fullResponse = ''
427
442
  let settled = false
@@ -15,7 +15,7 @@ import { loadSettings } from '@/lib/server/settings/settings-repository'
15
15
  import { loadSkills } from '@/lib/server/skills/skill-repository'
16
16
  import { resolveImagePath } from '@/lib/server/resolve-image'
17
17
  import { resolveSessionToolPolicy } from '@/lib/server/tool-capability-policy'
18
- import { listUniversalToolAccessExtensionIds } from '@/lib/server/universal-tool-access'
18
+ import { listUniversalToolAccessExtensionIds, listScopedToolAccessExtensionIds } from '@/lib/server/universal-tool-access'
19
19
  import {
20
20
  buildAgentDisabledMessage,
21
21
  isAgentDisabled,
@@ -332,9 +332,17 @@ function buildAgentSystemPrompt(
332
332
  const allowSilentReplies = isDirectConnectorSession(session)
333
333
  const lightweightDirectChat = options?.lightweightDirectChat === true
334
334
  const parts: string[] = []
335
- const enabledExtensions = listUniversalToolAccessExtensionIds(
336
- getEnabledCapabilityIds(session).length > 0 ? getEnabledCapabilityIds(session) : getEnabledCapabilityIds(agent),
337
- )
335
+ const capabilityIds = getEnabledCapabilityIds(session).length > 0
336
+ ? getEnabledCapabilityIds(session)
337
+ : getEnabledCapabilityIds(agent)
338
+ // Scoped tool access is the new default: if the agent declares a non-empty
339
+ // `tools` list, the system prompt only describes those tools. Explicit
340
+ // `toolAccessMode: 'universal'` opts into the full firehose (for coordinators
341
+ // or debugging). Agents with no declared tools fall back to universal so
342
+ // empty-config agents aren't crippled.
343
+ const enabledExtensions = agent.toolAccessMode !== 'universal' && Array.isArray(agent.tools) && agent.tools.length > 0
344
+ ? listScopedToolAccessExtensionIds(agent.tools, capabilityIds)
345
+ : listUniversalToolAccessExtensionIds(capabilityIds)
338
346
 
339
347
  const identityLines = ['## My Identity']
340
348
  identityLines.push(`Name: ${agent.name}`)
@@ -547,8 +555,16 @@ export async function prepareChatTurn(input: ExecuteChatTurnInput): Promise<Prep
547
555
  const runtimeCapabilityIds = filterRuntimeCapabilityIds(getEnabledCapabilityIds(session), {
548
556
  delegationEnabled: agentForSession?.delegationEnabled === true,
549
557
  })
558
+ // Match the resolver in buildAgentSystemPrompt: default to scoped whenever
559
+ // the agent declares a non-empty tools list, unless explicitly set to
560
+ // 'universal'. Agents with no declared tools stay universal.
561
+ const scopedAccess = agentForSession?.toolAccessMode !== 'universal'
562
+ && Array.isArray(agentForSession?.tools)
563
+ && (agentForSession!.tools!.length > 0)
550
564
  const requestedCapabilityIds = runtimeCapabilityIds.length > 0
551
- ? listUniversalToolAccessExtensionIds(runtimeCapabilityIds)
565
+ ? (scopedAccess
566
+ ? listScopedToolAccessExtensionIds(agentForSession!.tools!, runtimeCapabilityIds)
567
+ : listUniversalToolAccessExtensionIds(runtimeCapabilityIds))
552
568
  : []
553
569
  const toolPolicy = resolveSessionToolPolicy(requestedCapabilityIds, appSettings)
554
570
  const isHeartbeatRun = input.internal === true && source === 'heartbeat'
@@ -34,7 +34,11 @@ const perfState = hmrSingleton('__swarmclaw_perf__', () => ({
34
34
  recentEntries: [] as PerfEntry[],
35
35
  }))
36
36
 
37
- const MAX_RECENT = 200
37
+ // Keep a generous ring buffer so perf entries from a chat turn survive the
38
+ // flurry of repository/queue events that fire between them. 200 was too small
39
+ // — queue.get/tasks.list fire ~20/s during task processing and would evict
40
+ // chat-execution/prompt entries before they could be read.
41
+ const MAX_RECENT = 2000
38
42
 
39
43
  function emitEntry(entry: PerfEntry): void {
40
44
  perfState.recentEntries.push(entry)
@@ -700,6 +700,7 @@ export function reconcileFinishedRunningTasks(): { reconciled: number; deadLette
700
700
  if (!fallbackText && !task.result) {
701
701
  task.status = 'failed'
702
702
  task.result = 'Agent session finished without producing output.'
703
+ task.checkoutRunId = null
703
704
  task.updatedAt = now
704
705
  tasksDirty = true
705
706
  continue
@@ -1105,13 +1106,23 @@ export async function processNext() {
1105
1106
  const currentQueue = loadQueue()
1106
1107
  const queueSet = new Set(currentQueue)
1107
1108
  let recovered = false
1109
+ let tasksDirty = false
1108
1110
  for (const [id, t] of Object.entries(allTasks) as [string, BoardTask][]) {
1109
1111
  if (t.status === 'queued' && !queueSet.has(id)) {
1110
1112
  log.info(TAG, `[queue] Recovering orphaned queued task: "${t.title}" (${id})`)
1113
+ // Defence in depth: a queued task must not carry a stale checkoutRunId
1114
+ // (left over from pre-1.5.38 retries). If it does, checkoutTask() will
1115
+ // reject every attempt and this orphan-recovery loop will spin at 100%
1116
+ // CPU re-queueing a task that can never run.
1117
+ if (t.checkoutRunId) {
1118
+ t.checkoutRunId = null
1119
+ tasksDirty = true
1120
+ }
1111
1121
  pushQueueUnique(currentQueue, id)
1112
1122
  recovered = true
1113
1123
  }
1114
1124
  }
1125
+ if (tasksDirty) saveTasks(allTasks)
1115
1126
  if (recovered) saveQueue(currentQueue)
1116
1127
  }
1117
1128
 
@@ -1152,6 +1163,7 @@ export async function processNext() {
1152
1163
  if (!agent) {
1153
1164
  task.status = 'failed'
1154
1165
  task.deadLetteredAt = Date.now()
1166
+ task.checkoutRunId = null
1155
1167
  task.error = `Agent ${task.agentId} not found`
1156
1168
  task.updatedAt = Date.now()
1157
1169
  saveTasks(latestTasks)
@@ -1182,6 +1194,7 @@ export async function processNext() {
1182
1194
  } else {
1183
1195
  task.status = 'failed'
1184
1196
  task.deadLetteredAt = Date.now()
1197
+ task.checkoutRunId = null
1185
1198
  task.error = `No agent matches required capabilities: [${reqCaps.join(', ')}]`
1186
1199
  task.updatedAt = Date.now()
1187
1200
  saveTasks(latestTasks)
@@ -309,6 +309,67 @@ describe('queue recovery', () => {
309
309
  assert.equal(output.attempts, 1)
310
310
  })
311
311
 
312
+ it('processNext orphan recovery clears stale checkoutRunId on queued tasks', () => {
313
+ // Regression: tasks written before the 1.5.38 fix could land in storage with
314
+ // status='queued' + a set checkoutRunId (because the old scheduleRetryOrDeadLetter
315
+ // forgot to release the checkout). Orphan recovery must repair this invalid combo
316
+ // so the next checkoutTask() can succeed — otherwise the loop spins forever.
317
+ const output = runWithTempDataDir<{
318
+ status: string | null
319
+ checkoutRunId: string | null
320
+ queued: string[]
321
+ }>(`
322
+ const storageMod = await import('@/lib/server/storage')
323
+ const queueMod = await import('@/lib/server/runtime/queue')
324
+ const storage = storageMod.default || storageMod
325
+ const queue = queueMod.default || queueMod
326
+
327
+ const now = Date.now()
328
+ storage.saveAgents({
329
+ 'agent-a': {
330
+ id: 'agent-a',
331
+ name: 'Agent A',
332
+ provider: 'openai',
333
+ model: 'gpt-test',
334
+ createdAt: now,
335
+ updatedAt: now,
336
+ },
337
+ })
338
+ storage.saveTasks({
339
+ stale: {
340
+ id: 'stale',
341
+ title: 'Pre-1.5.38 stuck task',
342
+ description: 'Queued but still holds a stale checkoutRunId from a prior failed run',
343
+ status: 'queued',
344
+ agentId: 'agent-a',
345
+ checkoutRunId: 'stale-run-id',
346
+ createdAt: now - 10_000,
347
+ updatedAt: now - 10_000,
348
+ },
349
+ })
350
+ // Intentionally NOT in the queue array — simulates the orphan condition.
351
+ storage.saveQueue([])
352
+
353
+ await queue.processNext()
354
+
355
+ const task = storage.loadTasks().stale
356
+ console.log(JSON.stringify({
357
+ status: task?.status ?? null,
358
+ checkoutRunId: task?.checkoutRunId ?? null,
359
+ queued: storage.loadQueue(),
360
+ }))
361
+ `)
362
+
363
+ // Orphan recovery should have put the task back in the queue AND cleared the stale id.
364
+ assert.equal(output.checkoutRunId, null, 'orphan recovery must clear stale checkoutRunId')
365
+ // After recovery the task either stayed queued or moved to running (depending on concurrency).
366
+ // Either way it must not still be stuck in an orphan state.
367
+ assert.ok(
368
+ output.status === 'queued' || output.status === 'running' || output.status === 'failed',
369
+ `unexpected status after recovery: ${output.status}`,
370
+ )
371
+ })
372
+
312
373
  it('dead-letter path clears checkoutRunId so terminal tasks do not appear checked-out', () => {
313
374
  const output = runWithTempDataDir<{
314
375
  status: string | null
@@ -654,6 +654,16 @@ export function resolveRuntimeSkills(options: ResolveRuntimeSkillsOptions = {}):
654
654
  }
655
655
  }
656
656
 
657
+ // Dedicated sub-budget for auto-attached learned skills. buildSeedFromLearned
658
+ // marks every learned skill as `attached`, which means a single coordinator
659
+ // agent with 100+ historical learnings could flood the whole 30 k pinned-skill
660
+ // block every turn (observed: 178 learned skills / 176 k chars candidate pool
661
+ // → 24 k-char Pinned Skills section on every CEO turn). We cap learned-skill
662
+ // injection well below the full budget so explicitly-pinned/always-on skills
663
+ // still fit afterward.
664
+ const MAX_LEARNED_SKILLS_PROMPT_CHARS = 8000
665
+ const MAX_LEARNED_SKILLS_IN_PROMPT = 6
666
+
657
667
  function selectPromptSkills(skills: ResolvedRuntimeSkill[]): ResolvedRuntimeSkill[] {
658
668
  const ordered = [...skills]
659
669
  .filter((skill) =>
@@ -670,16 +680,39 @@ function selectPromptSkills(skills: ResolvedRuntimeSkill[]): ResolvedRuntimeSkil
670
680
 
671
681
  const selected: ResolvedRuntimeSkill[] = []
672
682
  let totalChars = 0
683
+ let learnedChars = 0
684
+ let learnedCount = 0
673
685
  for (const skill of ordered) {
674
686
  if (selected.length >= MAX_SKILLS_IN_PROMPT) break
675
687
  const contentLen = skill.name.length + skill.content.length + 12
676
688
  if (totalChars + contentLen > MAX_SKILLS_PROMPT_CHARS) continue
689
+ const isLearned = skill.source === 'learned'
690
+ if (isLearned) {
691
+ if (learnedCount >= MAX_LEARNED_SKILLS_IN_PROMPT) continue
692
+ if (learnedChars + contentLen > MAX_LEARNED_SKILLS_PROMPT_CHARS) continue
693
+ learnedChars += contentLen
694
+ learnedCount += 1
695
+ }
677
696
  totalChars += contentLen
678
697
  selected.push(skill)
679
698
  }
680
699
  return selected
681
700
  }
682
701
 
702
+ // Hard cap on how much skill content we inline per pinned skill. Long skill
703
+ // files (multi-page markdown guides) were dominating the system prompt — one
704
+ // coordinator agent had 24,402 chars (39% of its 62 k budget) from a single
705
+ // pinned skill. When content exceeds the cap we truncate and instruct the
706
+ // agent to pull the rest on demand via `use_skill` action="load".
707
+ const INLINED_SKILL_CHAR_CAP = 3000
708
+
709
+ function truncateInlinedSkillContent(content: string, skillName: string): string {
710
+ const trimmed = content.trim()
711
+ if (trimmed.length <= INLINED_SKILL_CHAR_CAP) return trimmed
712
+ const head = trimmed.slice(0, INLINED_SKILL_CHAR_CAP)
713
+ return `${head}\n\n[Skill content truncated at ${INLINED_SKILL_CHAR_CAP} chars to save context. Call \`use_skill\` with action="load" and skillId for "${skillName}" to load the full guide when you need it.]`
714
+ }
715
+
683
716
  function sectionFromSkills(params: {
684
717
  title: string
685
718
  preface: string
@@ -688,7 +721,7 @@ function sectionFromSkills(params: {
688
721
  const usable = params.skills.filter((skill) => skill.content.trim())
689
722
  if (usable.length === 0) return ''
690
723
  const body = usable
691
- .map((skill) => `### ${skill.name}\n${skill.content}`)
724
+ .map((skill) => `### ${skill.name}\n${truncateInlinedSkillContent(skill.content, skill.name)}`)
692
725
  .join('\n\n')
693
726
  return [params.title, params.preface, '', body].join('\n')
694
727
  }
@@ -0,0 +1,71 @@
1
+ import { describe, it } from 'node:test'
2
+ import assert from 'node:assert/strict'
3
+
4
+ // NOTE: we intentionally avoid importing the real universal-tool-access
5
+ // module here — it pulls in the extension manager which transitively loads
6
+ // the whole plugin system and OOMs in test workers. We re-declare the pure
7
+ // logic and verify the algorithmic behavior. Integration coverage for the
8
+ // extension-manager branch happens via live-chat profiling instead.
9
+
10
+ const SCOPED_TOOL_BASELINE = ['memory', 'context_mgmt', 'ask_human'] as const
11
+ const UNIVERSAL_SAMPLE = new Set([
12
+ 'shell', 'files', 'edit_file', 'delegate', 'web', 'browser', 'memory',
13
+ 'manage_platform', 'manage_tasks', 'context_mgmt', 'ask_human',
14
+ 'schedule_wake', 'email', 'image_gen',
15
+ ])
16
+
17
+ function normalize(value: string[] | undefined | null): string[] {
18
+ if (!Array.isArray(value)) return []
19
+ return value.map((entry) => (typeof entry === 'string' ? entry.trim() : '')).filter(Boolean)
20
+ }
21
+
22
+ function scoped(declared: string[] | null | undefined, universe: Set<string> = UNIVERSAL_SAMPLE): string[] {
23
+ const picks = normalize(declared).filter((t) => universe.has(t))
24
+ return Array.from(new Set([...SCOPED_TOOL_BASELINE, ...picks]))
25
+ }
26
+
27
+ describe('scoped tool access algorithm', () => {
28
+ it('intersects declared tools with the universe and keeps the baseline', () => {
29
+ const out = scoped(['shell', 'files', 'edit_file', 'web'])
30
+ assert.ok(out.includes('memory'))
31
+ assert.ok(out.includes('context_mgmt'))
32
+ assert.ok(out.includes('ask_human'))
33
+ assert.ok(out.includes('shell'))
34
+ assert.ok(out.includes('files'))
35
+ assert.ok(out.includes('edit_file'))
36
+ assert.ok(out.includes('web'))
37
+ assert.ok(!out.includes('browser'))
38
+ assert.ok(!out.includes('manage_platform'))
39
+ assert.ok(!out.includes('delegate'))
40
+ })
41
+
42
+ it('drops declared tools that are not in the universe', () => {
43
+ const out = scoped(['shell', 'not_a_real_tool'])
44
+ assert.ok(out.includes('shell'))
45
+ assert.ok(!out.includes('not_a_real_tool'))
46
+ })
47
+
48
+ it('returns only the baseline when declared tools is empty', () => {
49
+ assert.deepEqual(scoped([]).sort(), ['ask_human', 'context_mgmt', 'memory'])
50
+ })
51
+
52
+ it('produces a strictly smaller set than the universe for a focused agent', () => {
53
+ assert.ok(scoped(['shell', 'files', 'web']).length < UNIVERSAL_SAMPLE.size)
54
+ })
55
+
56
+ it('deduplicates when baseline overlaps with declared tools', () => {
57
+ const out = scoped(['memory', 'shell'])
58
+ assert.equal(out.filter((t) => t === 'memory').length, 1)
59
+ })
60
+
61
+ it('treats null / undefined / non-array declared tools as empty', () => {
62
+ assert.deepEqual(scoped(null).sort(), ['ask_human', 'context_mgmt', 'memory'])
63
+ assert.deepEqual(scoped(undefined).sort(), ['ask_human', 'context_mgmt', 'memory'])
64
+ })
65
+
66
+ it('trims whitespace in declared tool names', () => {
67
+ const out = scoped([' shell ', '\tfiles\n'])
68
+ assert.ok(out.includes('shell'))
69
+ assert.ok(out.includes('files'))
70
+ })
71
+ })
@@ -57,3 +57,26 @@ export function listUniversalToolAccessExtensionIds(extraExtensions?: string[] |
57
57
  ...normalizeExtensionList(extraExtensions),
58
58
  ])
59
59
  }
60
+
61
+ // Minimum extensions that a 'scoped' agent always gets regardless of its
62
+ // declared tool list. Memory + context management are required for the agent
63
+ // to function (remembering things, noticing when it's out of context), and
64
+ // ask_human lets it escalate to the user when stuck. Everything else is
65
+ // filterable through agent.tools.
66
+ const SCOPED_TOOL_BASELINE = ['memory', 'context_mgmt', 'ask_human'] as const
67
+
68
+ /**
69
+ * Returns the set of enabled extension IDs for a scoped-access agent: the
70
+ * intersection of `listUniversalToolAccessExtensionIds()` with the agent's
71
+ * declared tools, plus the non-negotiable baseline. Use this when an agent
72
+ * has opted into `toolAccessMode: 'scoped'` to shrink per-turn context.
73
+ */
74
+ export function listScopedToolAccessExtensionIds(
75
+ declaredTools: string[] | null | undefined,
76
+ extraExtensions?: string[] | null,
77
+ ): string[] {
78
+ const universe = new Set(listUniversalToolAccessExtensionIds(extraExtensions))
79
+ const declared = normalizeExtensionList(declaredTools)
80
+ const scoped = declared.filter((tool) => universe.has(tool))
81
+ return dedup([...SCOPED_TOOL_BASELINE, ...scoped])
82
+ }
@@ -68,6 +68,13 @@ export interface Agent {
68
68
  delegationTargetMode?: DelegationTargetMode
69
69
  delegationTargetAgentIds?: string[]
70
70
  tools?: string[]
71
+ // When 'scoped', the chat turn restricts enabled extensions to the
72
+ // intersection of the universal core list and agent.tools (plus a small
73
+ // non-negotiable baseline for memory + context management). Default
74
+ // 'universal' preserves existing behavior. Opt in to cut per-turn tool
75
+ // guidance dramatically — a focused agent with 5 tools drops ~15 k chars
76
+ // of tool-related prompt text vs. the full 33-tool universe.
77
+ toolAccessMode?: 'universal' | 'scoped'
71
78
  extensions?: string[]
72
79
  skills?: string[] // e.g. ['frontend-design'] — pinned Claude Code skills to mention explicitly
73
80
  skillIds?: string[] // IDs of pinned managed skills to keep always-on for this agent