npm - @swarmclawai/swarmclaw - Versions diffs - 1.9.21 → 1.9.22 - Mend

@swarmclawai/swarmclaw 1.9.21 → 1.9.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +9 -0
package/package.json +2 -2
package/src/components/chat/activity-moment.tsx +4 -0
package/src/components/chat/tool-call-bubble.tsx +6 -0
package/src/lib/server/capability-router.test.ts +4 -4
package/src/lib/server/capability-router.ts +1 -0
package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +27 -0
package/src/lib/server/chat-execution/chat-execution-utils.ts +21 -0
package/src/lib/server/chat-execution/iteration-event-handler.ts +1 -1
package/src/lib/server/chat-execution/stream-continuation.ts +6 -2
package/src/lib/server/plugins-advanced.test.ts +7 -3
package/src/lib/server/session-tools/web-crawl.test.ts +106 -0
package/src/lib/server/session-tools/web-inputs.test.ts +5 -0
package/src/lib/server/session-tools/web-utils.ts +8 -2
package/src/lib/server/session-tools/web.ts +256 -29
package/src/lib/server/storage.ts +2 -0
package/src/lib/server/tool-aliases.ts +1 -1
package/src/lib/server/tool-capability-policy-advanced.test.ts +3 -3
package/src/lib/server/tool-capability-policy.ts +4 -1
package/src/lib/server/tool-planning.test.ts +2 -1
package/src/lib/server/tool-planning.ts +31 -0
package/src/lib/server/untrusted-content.ts +2 -2
package/src/types/session.ts +2 -0

package/README.md CHANGED Viewed

@@ -409,6 +409,15 @@ Operational docs: https://swarmclaw.ai/docs/observability
 ## Releases
+### v1.9.22 Highlights
+Research tools release: agents now get direct `web_extract` and `web_crawl` tools alongside `web_search`, `web_fetch`, and the unified `web` tool.
+- **Source-grounded extraction.** `web_extract` returns a page title, canonical URL, and readable content for known source URLs.
+- **Bounded crawls.** `web_crawl` walks same-origin links by default with conservative page and depth caps, plus an explicit external-link opt-in.
+- **Better routing.** Tool aliases, capability policy, planning hints, continuation recovery, and the chat UI all recognize the granular research tools.
+- **Regression coverage.** New tests cover action inference, tool-call translation, direct tool registration, extraction cleanup, and same-origin crawl bounds.
 ### v1.9.21 Highlights
 Provider diagnostics release: connection checks now return a structured step timeline across setup, provider settings, and agent editing.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@swarmclawai/swarmclaw",
-  "version": "1.9.21",
+  "version": "1.9.22",
   "description": "Build and run autonomous AI agents with OpenClaw, Hermes, multiple model providers, orchestration, delegation, memory, skills, schedules, and chat connectors.",
   "main": "electron-dist/main.js",
   "license": "MIT",
@@ -88,7 +88,7 @@
     "test:cli": "node --test src/cli/*.test.js bin/*.test.js scripts/electron-after-pack.test.mjs scripts/electron-signing-config.test.mjs scripts/ensure-sandbox-browser-image.test.mjs scripts/postinstall.test.mjs scripts/run-next-build.test.mjs scripts/run-next-typegen.test.mjs",
     "test:setup": "tsx --test src/app/api/setup/check-provider/route.test.ts src/lib/server/provider-model-discovery.test.ts src/components/auth/setup-wizard/utils.test.ts src/components/auth/setup-wizard/types.test.ts src/hooks/setup-done-detection.test.ts src/lib/setup-defaults.test.ts src/lib/server/storage-auth.test.ts src/lib/server/storage-auth-docker.test.ts",
     "test:openclaw": "tsx --test src/lib/openclaw/openclaw-agent-id.test.ts src/lib/openclaw/openclaw-endpoint.test.ts src/lib/server/agents/agent-runtime-config.test.ts src/lib/server/build-llm.test.ts src/lib/server/connectors/connector-routing.test.ts src/lib/server/connectors/openclaw.test.ts src/lib/server/connectors/swarmdock.test.ts src/lib/server/gateway/protocol.test.ts src/lib/server/gateways/gateway-topology.test.ts src/lib/server/llm-response-cache.test.ts src/lib/server/mcp-conformance.test.ts src/lib/server/openclaw/agent-resolver.test.ts src/lib/server/openclaw/deploy.test.ts src/lib/server/openclaw/skills-normalize.test.ts src/lib/server/session-tools/openclaw-nodes.test.ts src/lib/server/session-tools/swarmdock.test.ts src/lib/server/tasks/task-quality-gate.test.ts src/lib/server/tasks/task-validation.test.ts src/lib/server/tool-capability-policy.test.ts src/lib/providers/openai.test.ts src/lib/providers/openclaw-exports.test.ts src/app/api/gateways/topology-route.test.ts src/app/api/openclaw/dashboard-url/route.test.ts",
-    "test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/agent-planning-mode.test.ts src/lib/agent-config-history.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/provider-diagnostics.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/baseline.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/prompt-sections.planning-mode.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/chats/session-context-pack.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/runs/run-handoff.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/server/schedules/schedule-history.test.ts src/lib/server/schedules/schedule-preview.test.ts src/lib/quality/release-readiness.test.ts src/lib/quality/architecture-health.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-execution-policy.test.ts src/lib/server/tasks/task-handoff.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-pack-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/config-versions/config-versions-route.test.ts src/app/api/runs/run-handoff-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/schedules/preview/route.test.ts src/app/api/schedules/schedule-history-route.test.ts src/app/api/tts/route.test.ts",
+    "test:runtime": "tsx --test src/lib/a2a/agent-card.test.ts src/lib/agent-planning-mode.test.ts src/lib/agent-config-history.test.ts src/lib/strip-internal-metadata.test.ts src/lib/provider-sets.test.ts src/lib/providers/opencode-cli.test.ts src/lib/providers/cli-provider-metadata.test.ts src/lib/providers/cli-utils.test.ts src/lib/providers/generic-cli.test.ts src/lib/server/agents/delegation-advisory.test.ts src/lib/server/cli-provider-readiness.test.ts src/lib/server/provider-health.test.ts src/lib/server/provider-diagnostics.test.ts src/lib/server/mcp-gateway-runtime.test.ts src/lib/server/mcp-connection-pool.test.ts src/lib/server/knowledge-sources.test.ts src/lib/server/extension-managed-resources.test.ts src/lib/server/eval/baseline.test.ts src/lib/server/eval/environment-plan.test.ts src/lib/server/chat-execution/chat-execution-grounding.test.ts src/lib/server/chat-execution/chat-turn-preparation.test.ts src/lib/server/chat-execution/iteration-timers.test.ts src/lib/server/chat-execution/post-stream-finalization.test.ts src/lib/server/chat-execution/prompt-sections.planning-mode.test.ts src/lib/server/chat-execution/reasoning-tag-scrubber.test.ts src/lib/server/chats/clear-undo-snapshots.test.ts src/lib/server/chats/session-context-pack.test.ts src/lib/server/connectors/email.test.ts src/lib/server/protocols/protocol-service.test.ts src/lib/server/runtime/run-ledger.test.ts src/lib/server/runtime/queue-retry-policy.test.ts src/lib/server/runs/run-brief.test.ts src/lib/server/runs/run-handoff.test.ts src/lib/server/operations/operation-pulse.test.ts src/lib/server/schedules/schedule-history.test.ts src/lib/server/schedules/schedule-preview.test.ts src/lib/quality/release-readiness.test.ts src/lib/quality/architecture-health.test.ts src/lib/server/artifacts/artifact-resolver.test.ts src/lib/server/observability/otel-config.test.ts src/lib/server/safe-parse-body.test.ts src/lib/server/missions/mission-templates.test.ts src/lib/server/sharing/share-link-repository.test.ts src/lib/server/sharing/share-resolver.test.ts src/lib/server/tasks/task-execution-workspace.test.ts src/lib/server/tasks/task-execution-policy.test.ts src/lib/server/tasks/task-handoff.test.ts src/lib/server/tasks/task-service.test.ts src/lib/server/session-tools/execute.test.ts src/lib/server/session-tools/manage-tasks.test.ts src/lib/server/session-tools/web-crawl.test.ts src/lib/app/view-constants.test.ts src/lib/quality/quality-summary.test.ts src/app/api/approvals/route.test.ts src/app/api/agents/agents-route.test.ts src/app/api/tasks/tasks-route.test.ts src/app/api/tasks/task-workspace-route.test.ts src/app/api/chats/chat-route.test.ts src/app/api/chats/clear-route.test.ts src/app/api/chats/compact-route.test.ts src/app/api/chats/context-pack-route.test.ts src/app/api/chats/context-status-route.test.ts src/app/api/config-versions/config-versions-route.test.ts src/app/api/runs/run-handoff-route.test.ts src/app/api/connectors/connector-doctor-route.test.ts src/app/api/extensions/managed-resources/route.test.ts src/app/api/healthz/route.test.ts src/app/api/logs/route.test.ts src/app/api/portability/export/route.test.ts src/app/api/portability/import/route.test.ts src/app/api/providers/[id]/route.test.ts src/app/api/schedules/preview/route.test.ts src/app/api/schedules/schedule-history-route.test.ts src/app/api/tts/route.test.ts",
     "test:builder": "tsx --test src/features/protocols/builder/utils/nodes-to-template.test.ts src/features/protocols/builder/utils/template-to-nodes.test.ts src/features/protocols/builder/validators/dag-validator.test.ts",
     "test:e2e": "node --import tsx scripts/browser-e2e-smoke.ts",
     "test:mcp:conformance": "node --import tsx ./scripts/mcp-conformance-check.ts",

package/src/components/chat/activity-moment.tsx CHANGED Viewed

@@ -19,6 +19,9 @@ const NOTABLE_TOOLS: Record<string, { label: string; color: string; icon: 'brain
   delegate_to_agent: { label: 'Delegating task', color: '#6366F1', icon: 'delegate' },
   check_delegation_status: { label: 'Checking delegation', color: '#6366F1', icon: 'delegate' },
   web_search: { label: 'Searched the web', color: '#22C55E', icon: 'search' },
+  web_fetch: { label: 'Read a web page', color: '#22C55E', icon: 'search' },
+  web_extract: { label: 'Extracted a web page', color: '#22C55E', icon: 'search' },
+  web_crawl: { label: 'Crawled a site', color: '#22C55E', icon: 'search' },
   connector_message_tool: { label: 'Sent a message', color: '#F97316', icon: 'message' },
 }
@@ -35,6 +38,7 @@ function extractSnippet(toolName: string, toolInput: string): string | null {
     if (toolName === 'check_delegation_status' && parsed.agentName) return parsed.agentName
     if (toolName.startsWith('delegate_to_') && parsed.task) return parsed.task
     if (toolName === 'web_search' && parsed.query) return parsed.query
+    if ((toolName === 'web_fetch' || toolName === 'web_extract' || toolName === 'web_crawl') && parsed.url) return parsed.url
     if (toolName === 'connector_message_tool' && parsed.to) return parsed.to
   } catch { /* ignore parse errors */ }
   return null

package/src/components/chat/tool-call-bubble.tsx CHANGED Viewed

@@ -20,6 +20,8 @@ const TOOL_COLORS: Record<string, string> = {
   create_spreadsheet: '#10B981',
   web_search: '#3B82F6',
   web_fetch: '#3B82F6',
+  web_extract: '#3B82F6',
+  web_crawl: '#3B82F6',
   spawn_subagent: '#8B5CF6',
   delegate_to_agent: '#6366F1',
   check_delegation_status: '#6366F1',
@@ -77,6 +79,8 @@ export const TOOL_LABELS: Record<string, string> = {
   create_spreadsheet: 'Create Spreadsheet',
   web_search: 'Web Search',
   web_fetch: 'Web Fetch',
+  web_extract: 'Web Extract',
+  web_crawl: 'Web Crawl',
   claude_code: 'Claude Code',
   codex_cli: 'Codex CLI',
   opencode_cli: 'OpenCode CLI',
@@ -127,6 +131,8 @@ export const TOOL_DESCRIPTIONS: Record<string, string> = {
   create_spreadsheet: 'Create Excel or CSV files from structured data',
   web_search: 'Search the web for information',
   web_fetch: 'Fetch and read web page content',
+  web_extract: 'Extract readable content from a source URL',
+  web_crawl: 'Crawl a bounded set of pages from one site',
   claude_code: 'Enable delegation to Claude Code CLI',
   codex_cli: 'Enable delegation to OpenAI Codex CLI',
   opencode_cli: 'Enable delegation to OpenCode CLI',

package/src/lib/server/capability-router.test.ts CHANGED Viewed

@@ -26,7 +26,7 @@ test('routeTaskIntent keeps coding prompts prioritized over memory keywords', ()
 test('routeTaskIntent keeps hybrid research-plus-media prompts in research intent', () => {
   const decision = routeTaskIntent(
     'Can you tell me more if there is any news related to the US-Iran war, and can you send me some screenshots and give me a summary and maybe send me a voice note about it?',
-    ['web_search', 'web_fetch', 'browser', 'manage_connectors'],
+    ['web_search', 'web_fetch', 'web_crawl', 'browser', 'manage_connectors'],
     null,
     makeClassification({
       taskIntent: 'research',
@@ -39,7 +39,7 @@ test('routeTaskIntent keeps hybrid research-plus-media prompts in research inten
   )
   assert.equal(decision.intent, 'research')
-  assert.deepEqual(decision.preferredTools, ['web_search', 'web_fetch', 'browser', 'connector_message_tool'])
+  assert.deepEqual(decision.preferredTools, ['web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'connector_message_tool'])
 })
 test('routeTaskIntent treats direct voice-note delivery as outreach', () => {
@@ -72,7 +72,7 @@ test('routeTaskIntent treats keep-watching update requests as research even with
   )
   assert.equal(decision.intent, 'research')
-  assert.deepEqual(decision.preferredTools, ['web_search', 'web_fetch'])
+  assert.deepEqual(decision.preferredTools, ['web_search', 'web_fetch', 'web_extract', 'web_crawl'])
 })
 test('routeTaskIntent uses structured classification when available', () => {
@@ -99,7 +99,7 @@ test('routeTaskIntent uses structured classification when available', () => {
   )
   assert.equal(decision.intent, 'browsing')
-  assert.deepEqual(decision.preferredTools, ['browser', 'web_fetch'])
+  assert.deepEqual(decision.preferredTools, ['browser', 'web_fetch', 'web_extract'])
 })
 function makeClassification(overrides: Partial<MessageClassification>): MessageClassification {

package/src/lib/server/capability-router.ts CHANGED Viewed

@@ -144,6 +144,7 @@ export function routeTaskIntent(
       [
         TOOL_CAPABILITY.researchSearch,
         TOOL_CAPABILITY.researchFetch,
+        TOOL_CAPABILITY.researchCrawl,
         ...(wantsScreenshots ? [TOOL_CAPABILITY.browserCapture] : []),
         ...(wantsVoiceDelivery ? [TOOL_CAPABILITY.deliveryVoiceNote] : []),
         ...(wantsOutboundDelivery ? [TOOL_CAPABILITY.deliveryMedia, TOOL_CAPABILITY.deliveryMessage] : []),

package/src/lib/server/chat-execution/chat-execution-advanced.test.ts CHANGED Viewed

@@ -407,6 +407,33 @@ describe('translateRequestedToolInvocation advanced', () => {
     assert.equal(args.action, 'search')
     assert.equal(args.query, 'test query')
   })
+  it('maps web_extract to web with action=extract', () => {
+    const { toolName, args } = translateRequestedToolInvocation(
+      'web_extract',
+      { url: 'https://example.com/source' },
+      '',
+      ['web'],
+    )
+    assert.equal(toolName, 'web')
+    assert.equal(args.action, 'extract')
+    assert.equal(args.url, 'https://example.com/source')
+  })
+  it('maps web_crawl to web with bounded crawl arguments', () => {
+    const { toolName, args } = translateRequestedToolInvocation(
+      'web_crawl',
+      { url: 'https://example.com/', maxPages: 4, maxDepth: 1, includeExternal: false },
+      '',
+      ['web'],
+    )
+    assert.equal(toolName, 'web')
+    assert.equal(args.action, 'crawl')
+    assert.equal(args.url, 'https://example.com/')
+    assert.equal(args.maxPages, 4)
+    assert.equal(args.maxDepth, 1)
+    assert.equal(args.includeExternal, false)
+  })
 })
 // ---------------------------------------------------------------------------

package/src/lib/server/chat-execution/chat-execution-utils.ts CHANGED Viewed

@@ -127,6 +127,27 @@ export function translateRequestedToolInvocation(
       },
     }
   }
+  if (requestedName === 'web_extract') {
+    return {
+      toolName: 'web',
+      args: {
+        action: 'extract',
+        url: rawArgs.url,
+      },
+    }
+  }
+  if (requestedName === 'web_crawl') {
+    return {
+      toolName: 'web',
+      args: {
+        action: 'crawl',
+        url: rawArgs.url || rawArgs.query,
+        maxPages: rawArgs.maxPages ?? rawArgs.maxResults,
+        maxDepth: rawArgs.maxDepth,
+        includeExternal: rawArgs.includeExternal,
+      },
+    }
+  }
   if (requestedName === 'delegate_to_claude_code') {
     return { toolName: 'delegate', args: { ...rawArgs, backend: 'claude' } }
   }

package/src/lib/server/chat-execution/iteration-event-handler.ts CHANGED Viewed

@@ -349,7 +349,7 @@ export async function processIterationEvents(opts: ProcessIterationEventsOpts):
       }
       if (
         boundedExternalExecutionTask
-        && ['http_request', 'web', 'web_search', 'web_fetch', 'browser'].includes(toolName)
+        && ['http_request', 'web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser'].includes(toolName)
         && countExternalExecutionResearchSteps(state.streamedToolEvents) >= 5
         && countDistinctExternalResearchHosts(state.streamedToolEvents) >= 3
       ) {

package/src/lib/server/chat-execution/stream-continuation.ts CHANGED Viewed

@@ -196,7 +196,7 @@ function getRequestedArtifactStatus(params: {
 export function countExternalExecutionResearchSteps(toolEvents: MessageToolEvent[]): number {
   return toolEvents.filter((event) => {
-    return ['http_request', 'web', 'web_search', 'web_fetch', 'browser'].includes(event.name)
+    return ['http_request', 'web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser'].includes(event.name)
   }).length
 }
@@ -300,6 +300,8 @@ const RECOVERABLE_TOOL_ERROR_NAMES = new Set([
   'web',
   'web_search',
   'web_fetch',
+  'web_extract',
+  'web_crawl',
   'http_request',
 ])
@@ -390,6 +392,8 @@ export function getToolFrequencyHint(toolName: string, sessionExtensions: string
     case 'http_request':
     case 'web_search':
     case 'web_fetch':
+    case 'web_extract':
+    case 'web_crawl':
       return 'Hint: You have done extensive research. Stop gathering more sources and use the information you already have to complete the task.'
     case 'spawn_subagent':
@@ -490,7 +494,7 @@ function buildDeliverableFollowthroughPrompt(params: {
   }
   if (
-    params.toolEvents.some((event) => ['web', 'web_search', 'web_fetch', 'browser', 'http_request'].includes(event.name))
+    params.toolEvents.some((event) => ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'http_request'].includes(event.name))
     && !params.toolEvents.some((event) => ['files', 'write_file', 'edit_file', 'shell', 'execute_command'].includes(event.name))
   ) {
     lines.push(

package/src/lib/server/plugins-advanced.test.ts CHANGED Viewed

@@ -135,11 +135,13 @@ describe('expandExtensionIds', () => {
     }
   })
-  it('web expands to include web_search and web_fetch', () => {
+  it('web expands to include granular web tools', () => {
     const result = expandExtensionIds(['web'])
     assert.ok(result.includes('web'))
     assert.ok(result.includes('web_search'))
     assert.ok(result.includes('web_fetch'))
+    assert.ok(result.includes('web_extract'))
+    assert.ok(result.includes('web_crawl'))
   })
   it('removes duplicates after expansion', () => {
@@ -199,12 +201,14 @@ describe('expandExtensionIds', () => {
 // getExtensionAliases
 // ---------------------------------------------------------------------------
 describe('getExtensionAliases', () => {
-  it('web returns [web, web_search, web_fetch]', () => {
+  it('web returns the full web alias group', () => {
     const result = getExtensionAliases('web')
     assert.ok(result.includes('web'))
     assert.ok(result.includes('web_search'))
     assert.ok(result.includes('web_fetch'))
-    assert.equal(result.length, 5) // web, web_search, web_fetch, http_request, http
+    assert.ok(result.includes('web_extract'))
+    assert.ok(result.includes('web_crawl'))
+    assert.equal(result.length, 7) // web, web_search, web_fetch, web_extract, web_crawl, http_request, http
   })
   it('web_search returns the same group as web', () => {

package/src/lib/server/session-tools/web-crawl.test.ts ADDED Viewed

@@ -0,0 +1,106 @@
+import assert from 'node:assert/strict'
+import { afterEach, describe, it } from 'node:test'
+import { buildWebTools } from './web'
+import type { ToolBuildContext } from './context'
+const originalFetch = globalThis.fetch
+function createContext(): ToolBuildContext {
+  return {
+    cwd: process.cwd(),
+    ctx: undefined,
+    hasExtension: (name: string) => name === 'web',
+    hasTool: (name: string) => name === 'web',
+    cleanupFns: [],
+    commandTimeoutMs: 1000,
+    claudeTimeoutMs: 1000,
+    cliProcessTimeoutMs: 1000,
+    persistDelegateResumeId: () => {},
+    readStoredDelegateResumeId: () => null,
+    resolveCurrentSession: () => null,
+    activeExtensions: ['web'],
+  } as ToolBuildContext
+}
+function mockFetch(pages: Record<string, string>, calls: string[] = []): void {
+  globalThis.fetch = (async (input: RequestInfo | URL) => {
+    const url = input instanceof Request ? input.url : String(input)
+    calls.push(url)
+    const html = pages[url]
+    if (!html) {
+      return new Response('missing', { status: 404, statusText: 'Not Found' })
+    }
+    return new Response(html, {
+      status: 200,
+      headers: { 'content-type': 'text/html; charset=utf-8' },
+    })
+  }) as typeof fetch
+}
+afterEach(() => {
+  globalThis.fetch = originalFetch
+})
+describe('web extract and crawl tools', () => {
+  it('registers direct granular web tools when web is enabled', () => {
+    const names = buildWebTools(createContext()).map((entry) => entry.name).sort()
+    assert.deepEqual(names.filter((name) => name.startsWith('web')), [
+      'web',
+      'web_crawl',
+      'web_extract',
+      'web_fetch',
+      'web_search',
+    ])
+  })
+  it('extracts readable page content with title and source URL', async () => {
+    mockFetch({
+      'https://example.test/article': `
+        <!doctype html>
+        <title>Feature Page</title>
+        <header>Ignore navigation</header>
+        <main>
+          <h1>Feature Page</h1>
+          <p>Readable body text for the agent.</p>
+        </main>
+        <script>console.log('hidden')</script>
+      `,
+    })
+    const tool = buildWebTools(createContext()).find((entry) => entry.name === 'web_extract')
+    assert.ok(tool)
+    const output = String(await tool.invoke({ url: 'https://example.test/article#section' }))
+    assert.match(output, /Title: Feature Page/)
+    assert.match(output, /URL: https:\/\/example\.test\/article/)
+    assert.match(output, /Readable body text for the agent\./)
+    assert.doesNotMatch(output, /Ignore navigation/)
+    assert.doesNotMatch(output, /console\.log/)
+  })
+  it('crawls same-origin pages within the requested page and depth bounds', async () => {
+    const calls: string[] = []
+    mockFetch({
+      'https://site.test/': `
+        <title>Start</title>
+        <main>Start page <a href="/a">A</a> <a href="/b">B</a> <a href="https://external.test/x">External</a></main>
+      `,
+      'https://site.test/a': '<title>A page</title><main>Alpha content</main>',
+      'https://site.test/b': '<title>B page</title><main>Beta content</main>',
+      'https://external.test/x': '<title>External</title><main>Should not be fetched</main>',
+    }, calls)
+    const tool = buildWebTools(createContext()).find((entry) => entry.name === 'web_crawl')
+    assert.ok(tool)
+    const output = String(await tool.invoke({ url: 'https://site.test/', maxPages: 3, maxDepth: 1 }))
+    assert.match(output, /Crawl results for: https:\/\/site\.test\//)
+    assert.match(output, /Pages crawled: 3/)
+    assert.match(output, /Start page/)
+    assert.match(output, /Alpha content/)
+    assert.match(output, /Beta content/)
+    assert.doesNotMatch(output, /Should not be fetched/)
+    assert.deepEqual(calls, ['https://site.test/', 'https://site.test/a', 'https://site.test/b'])
+  })
+})

package/src/lib/server/session-tools/web-inputs.test.ts CHANGED Viewed

@@ -20,6 +20,11 @@ describe('inferWebActionFromArgs', () => {
     assert.equal(inferWebActionFromArgs({ action: 'search', url: 'https://example.com/article' }), 'search')
   })
+  it('preserves explicit extract and crawl actions', () => {
+    assert.equal(inferWebActionFromArgs({ action: 'extract', url: 'https://example.com/article' }), 'extract')
+    assert.equal(inferWebActionFromArgs({ action: 'crawl', url: 'https://example.com/' }), 'crawl')
+  })
   it('normalizes stringified browser form payloads', () => {
     const normalized = normalizeBrowserActionParams({
       input: JSON.stringify({

package/src/lib/server/session-tools/web-utils.ts CHANGED Viewed

@@ -176,8 +176,14 @@ export function inferWebActionFromArgs(params: {
   query?: string
   url?: string
   method?: string
-}): 'search' | 'fetch' | 'api' | undefined {
-  if (params.action === 'search' || params.action === 'fetch' || params.action === 'api') return params.action
+}): 'search' | 'fetch' | 'extract' | 'crawl' | 'api' | undefined {
+  if (
+    params.action === 'search'
+    || params.action === 'fetch'
+    || params.action === 'extract'
+    || params.action === 'crawl'
+    || params.action === 'api'
+  ) return params.action
   if (typeof params.method === 'string' && params.method.trim()) return 'api'
   if (typeof params.url === 'string' && /^https?:\/\//i.test(params.url.trim())) return 'fetch'
   if (typeof params.query === 'string' && params.query.trim()) return 'search'

package/src/lib/server/session-tools/web.ts CHANGED Viewed

@@ -199,6 +199,149 @@ async function executeWebApiAction(normalized: Record<string, unknown>) {
   }, requestArgs)
 }
+interface ExtractedWebPage {
+  url: string
+  title: string
+  text: string
+  links: string[]
+}
+function normalizeHttpUrl(rawUrl: string): string {
+  const trimmed = rawUrl.trim()
+  if (!trimmed) throw new Error('URL is required.')
+  const parsed = new URL(trimmed)
+  if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
+    throw new Error('Only http and https URLs are supported.')
+  }
+  parsed.hash = ''
+  return parsed.toString()
+}
+function clampNumber(value: unknown, fallback: number, min: number, max: number): number {
+  const parsed = typeof value === 'number'
+    ? value
+    : typeof value === 'string'
+      ? Number.parseInt(value, 10)
+      : Number.NaN
+  if (!Number.isFinite(parsed)) return fallback
+  return Math.max(min, Math.min(max, Math.trunc(parsed)))
+}
+function extractLinks($: ReturnType<typeof cheerio.load>, pageUrl: string): string[] {
+  const links: string[] = []
+  $('a[href]').each((_index, element) => {
+    const rawHref = $(element).attr('href') || ''
+    try {
+      const resolved = new URL(rawHref, pageUrl)
+      if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:') return
+      resolved.hash = ''
+      const href = resolved.toString()
+      if (!links.includes(href)) links.push(href)
+    } catch {
+      // Ignore malformed links from the crawled page.
+    }
+  })
+  return links
+}
+async function extractReadablePage(fetchUrl: string): Promise<ExtractedWebPage> {
+  const url = normalizeHttpUrl(fetchUrl)
+  const res = await fetch(url, {
+    headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
+    signal: AbortSignal.timeout(15000),
+  })
+  if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`)
+  const contentType = res.headers.get('content-type') || ''
+  if (contentType.includes('application/pdf')) {
+    const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
+    const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
+    const arrayBuffer = await res.arrayBuffer()
+    const result = await pdfParse(Buffer.from(arrayBuffer))
+    return { url, title: url, text: result.text, links: [] }
+  }
+  const html = await res.text()
+  const $ = cheerio.load(html)
+  const title = $('title').first().text().replace(/\s+/g, ' ').trim() || url
+  const links = extractLinks($, url)
+  $('script, style, noscript, nav, footer, header').remove()
+  const main = $('article, main, [role="main"]').first()
+  const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
+  return { url, title, text, links }
+}
+function formatExtractedPage(page: ExtractedWebPage): string {
+  const lines = [`Title: ${page.title}`, `URL: ${page.url}`, '', page.text || '(no readable text found)']
+  return truncate(lines.join('\n'), MAX_OUTPUT)
+}
+function formatCrawlResults(startUrl: string, pages: ExtractedWebPage[]): string {
+  if (pages.length === 0) return `No crawl results found for: ${startUrl}`
+  const sections = [`Crawl results for: ${startUrl}`, `Pages crawled: ${pages.length}`]
+  for (let index = 0; index < pages.length; index++) {
+    const page = pages[index]
+    const text = truncate(page.text || '(no readable text found)', 1200)
+    sections.push(`${index + 1}. ${page.title}\nURL: ${page.url}\nText: ${text}`)
+  }
+  return truncate(sections.join('\n\n'), MAX_OUTPUT)
+}
+async function executeWebExtractAction(normalized: Record<string, unknown>) {
+  const rawUrl = String(normalized.url || normalized.query || '')
+  if (!rawUrl.trim()) return 'Error: "url" is required for extract action.'
+  try {
+    return formatExtractedPage(await extractReadablePage(rawUrl))
+  } catch (err: unknown) {
+    return `Error: ${errorMessage(err)}`
+  }
+}
+async function executeWebCrawlAction(normalized: Record<string, unknown>) {
+  const rawUrl = String(normalized.url || normalized.query || '')
+  if (!rawUrl.trim()) return 'Error: "url" is required for crawl action.'
+  let startUrl: string
+  try {
+    startUrl = normalizeHttpUrl(rawUrl)
+  } catch (err: unknown) {
+    return `Error: ${errorMessage(err)}`
+  }
+  const maxPages = clampNumber(normalized.maxPages ?? normalized.maxResults, 5, 1, 25)
+  const maxDepth = clampNumber(normalized.maxDepth, 1, 0, 3)
+  const includeExternal = normalized.includeExternal === true || normalized.sameOrigin === false
+  const startOrigin = new URL(startUrl).origin
+  const queue: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }]
+  const seen = new Set<string>()
+  const pages: ExtractedWebPage[] = []
+  while (queue.length > 0 && pages.length < maxPages) {
+    const next = queue.shift()
+    if (!next) break
+    if (seen.has(next.url)) continue
+    seen.add(next.url)
+    let page: ExtractedWebPage
+    try {
+      page = await extractReadablePage(next.url)
+    } catch (err: unknown) {
+      page = { url: next.url, title: next.url, text: `Error: ${errorMessage(err)}`, links: [] }
+    }
+    pages.push(page)
+    if (next.depth >= maxDepth) continue
+    for (const link of page.links) {
+      if (seen.has(link)) continue
+      if (!includeExternal && new URL(link).origin !== startOrigin) continue
+      if (queue.some((entry) => entry.url === link)) continue
+      queue.push({ url: link, depth: next.depth + 1 })
+      if (queue.length + seen.size >= maxPages * 4) break
+    }
+  }
+  return formatCrawlResults(startUrl, pages)
+}
 async function executeWebAction(args: Record<string, unknown>) {
   const normalized = normalizeToolInputArgs(args)
   const { query, url, maxResults } = normalized as { query?: string; url?: string; maxResults?: number }
@@ -219,32 +362,13 @@ async function executeWebAction(args: Record<string, unknown>) {
       const results = await provider.search(searchQuery, limit)
       if (results.length === 0) return 'No results found.'
       return formatWebSearchResults(searchQuery, results)
-    } else if (action === 'fetch') {
+    } else if (action === 'fetch' || action === 'extract') {
       const fetchUrl = url || query
-      if (!fetchUrl) return 'Error: "url" is required for fetch action.'
-      const res = await fetch(fetchUrl, {
-        headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
-        signal: AbortSignal.timeout(15000),
-      })
-      if (!res.ok) return `HTTP ${res.status}: ${res.statusText}`
-      const contentType = res.headers.get('content-type') || ''
-      if (contentType.includes('application/pdf')) {
-        try {
-          const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
-          const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
-          const arrayBuffer = await res.arrayBuffer()
-          const result = await pdfParse(Buffer.from(arrayBuffer))
-          return truncate(result.text, MAX_OUTPUT)
-        } catch (err: unknown) {
-          return `Error parsing PDF: ${errorMessage(err)}`
-        }
-      }
-      const html = await res.text()
-      const $ = cheerio.load(html)
-      $('script, style, noscript, nav, footer, header').remove()
-      const main = $('article, main, [role="main"]').first()
-      const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
-      return truncate(text, MAX_OUTPUT)
+      if (!fetchUrl) return `Error: "url" is required for ${action} action.`
+      const page = await extractReadablePage(fetchUrl)
+      return action === 'extract' ? formatExtractedPage(page) : truncate(page.text, MAX_OUTPUT)
+    } else if (action === 'crawl') {
+      return executeWebCrawlAction(normalized)
     } else if (action === 'api') {
       return executeWebApiAction(normalized)
     }
@@ -259,21 +383,25 @@ async function executeWebAction(args: Record<string, unknown>) {
  */
 const WebExtension: Extension = {
   name: 'Core Web',
-  description: 'Search the web, fetch content, and make HTTP API calls.',
+  description: 'Search the web, extract pages, crawl sites, and make HTTP API calls.',
   hooks: {
-    getCapabilityDescription: () => 'I can use the unified `web` tool with action `search` for research, `fetch` for reading a URL, and `api` for raw HTTP API calls with full control over method/headers/body.',
+    getCapabilityDescription: () => 'I can use `web_search` for fresh research, `web_extract` for a specific URL, `web_crawl` for bounded multi-page site reads, and the unified `web` tool for search, fetch, crawl, and raw HTTP API calls.',
   } as ExtensionHooks,
   tools: [
     {
       name: 'web',
-      description: 'Unified web access tool. Actions: search (web search), fetch (read URL content), api (raw HTTP request with method/headers/body).',
+      description: 'Unified web access tool. Actions: search (web search), fetch/extract (read URL content), crawl (bounded same-origin crawl), api (raw HTTP request with method/headers/body).',
       parameters: {
         type: 'object',
         properties: {
-          action: { type: 'string', enum: ['search', 'fetch', 'api'] },
+          action: { type: 'string', enum: ['search', 'fetch', 'extract', 'crawl', 'api'] },
           query: { type: 'string' },
           url: { type: 'string' },
           maxResults: { type: 'number' },
+          maxPages: { type: 'number', description: 'Maximum pages for crawl action, default 5, max 25' },
+          maxDepth: { type: 'number', description: 'Maximum crawl depth, default 1, max 3' },
+          includeExternal: { type: 'boolean', description: 'Allow crawl to leave the starting origin, default false' },
+          sameOrigin: { type: 'boolean', description: 'Keep crawl on the starting origin when true, default true' },
           method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS'], description: 'HTTP method (for api action)' },
           headers: { type: 'object', additionalProperties: { type: 'string' }, description: 'Request headers (for api action)' },
           body: { type: 'string', description: 'Request body (for api action)' },
@@ -283,6 +411,71 @@ const WebExtension: Extension = {
         required: ['action']
       },
       execute: async (args) => executeWebAction(args)
+    },
+    {
+      name: 'web_search',
+      description: 'Search the web and return ranked results with URLs and snippets.',
+      parameters: {
+        type: 'object',
+        properties: {
+          query: { type: 'string' },
+          maxResults: { type: 'number' },
+        },
+        required: ['query'],
+      },
+      planning: {
+        capabilities: ['research.search'],
+        disciplineGuidance: ['Use `web_search` for fresh information, then fetch or extract only the sources you need.'],
+      },
+      execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'search' }),
+    },
+    {
+      name: 'web_fetch',
+      description: 'Read a specific URL and return readable page text.',
+      parameters: {
+        type: 'object',
+        properties: { url: { type: 'string' } },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.fetch'],
+        disciplineGuidance: ['Use `web_fetch` when you already have a URL and only need the readable text.'],
+      },
+      execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'fetch' }),
+    },
+    {
+      name: 'web_extract',
+      description: 'Extract readable content from a URL with title and source URL included.',
+      parameters: {
+        type: 'object',
+        properties: { url: { type: 'string' } },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.fetch'],
+        disciplineGuidance: ['Use `web_extract` for source-grounded page reads where the title and URL should stay attached to the extracted text.'],
+      },
+      execute: async (args) => executeWebExtractAction(normalizeToolInputArgs(args)),
+    },
+    {
+      name: 'web_crawl',
+      description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
+      parameters: {
+        type: 'object',
+        properties: {
+          url: { type: 'string' },
+          maxPages: { type: 'number' },
+          maxDepth: { type: 'number' },
+          includeExternal: { type: 'boolean' },
+          sameOrigin: { type: 'boolean' },
+        },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.crawl'],
+        disciplineGuidance: ['Use `web_crawl` only when the task needs multiple pages from the same site. Keep maxPages low and summarize after one crawl.'],
+      },
+      execute: async (args) => executeWebCrawlAction(normalizeToolInputArgs(args)),
     }
   ]
 }
@@ -307,6 +500,40 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
         }
       )
     )
+    tools.push(
+      tool(
+        async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'search' }),
+        {
+          name: 'web_search',
+          description: 'Search the web and return ranked results with URLs and snippets.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'fetch' }),
+        {
+          name: 'web_fetch',
+          description: 'Read a specific URL and return readable page text.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebExtractAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
+        {
+          name: 'web_extract',
+          description: 'Extract readable content from a URL with title and source URL included.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebCrawlAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
+        {
+          name: 'web_crawl',
+          description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
+          schema: z.object({}).passthrough()
+        }
+      )
+    )
   }
   // Browser tool (kept as direct injection for now due to complexity)

package/src/lib/server/storage.ts CHANGED Viewed

@@ -664,6 +664,8 @@ if (!IS_BUILD_BOOTSTRAP) {
     'files',
     'web_search',
     'web_fetch',
+    'web_extract',
+    'web_crawl',
     'browser',
     'manage_agents',
     'manage_tasks',

package/src/lib/server/tool-aliases.ts CHANGED Viewed

@@ -3,7 +3,7 @@ const EXTENSION_ALIAS_GROUPS: string[][] = [
   ['execute', 'sandbox'],
   ['files', 'read_file', 'write_file', 'list_files', 'copy_file', 'move_file', 'delete_file', 'send_file'],
   ['edit_file'],
-  ['web', 'web_search', 'web_fetch', 'http_request', 'http'],
+  ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request', 'http'],
   ['browser', 'openclaw_browser'],
   ['delegate', 'claude_code', 'codex_cli', 'opencode_cli', 'gemini_cli', 'copilot_cli', 'droid_cli', 'cursor_cli', 'qwen_code_cli', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'],
   ['manage_platform'],

package/src/lib/server/tool-capability-policy-advanced.test.ts CHANGED Viewed

@@ -255,12 +255,12 @@ describe('explicit allows override mode blocks', () => {
 // Category blocks
 // ---------------------------------------------------------------------------
 describe('category blocks', () => {
-  it('blocking network category blocks web, web_search, web_fetch', () => {
-    const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'memory'], {
+  it('blocking network category blocks granular web tools', () => {
+    const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'memory'], {
       capabilityBlockedCategories: ['network'],
     })
     assert.deepStrictEqual(d.enabledExtensions, ['memory'])
-    assert.equal(d.blockedExtensions.length, 3)
+    assert.equal(d.blockedExtensions.length, 5)
     for (const b of d.blockedExtensions) {
       assert.match(b.reason, /category "network"/)
     }

package/src/lib/server/tool-capability-policy.ts CHANGED Viewed

@@ -49,9 +49,11 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
   move_file: { categories: ['filesystem'], concreteTools: ['move_file'] },
   edit_file: { categories: ['filesystem'], concreteTools: ['edit_file'] },
   delete_file: { categories: ['filesystem'], concreteTools: ['delete_file'], destructive: true },
-  web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch'] },
+  web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl'] },
   web_search: { categories: ['network'], concreteTools: ['web_search'] },
   web_fetch: { categories: ['network'], concreteTools: ['web_fetch'] },
+  web_extract: { categories: ['network'], concreteTools: ['web_extract'] },
+  web_crawl: { categories: ['network'], concreteTools: ['web_crawl'] },
   browser: { categories: ['browser', 'network'], concreteTools: ['browser', 'openclaw_browser'] },
   delegate: { categories: ['delegation', 'execution'], concreteTools: ['delegate', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'] },
   claude_code: { categories: ['delegation', 'execution'], concreteTools: ['delegate_to_claude_code'] },
@@ -85,6 +87,7 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
   spawn_subagent: { categories: ['delegation', 'platform'], concreteTools: ['spawn_subagent', 'delegate_to_agent'] },
   context_mgmt: { categories: ['memory'], concreteTools: ['context_mgmt', 'context_status', 'context_summarize'] },
   extension_creator: { categories: ['filesystem', 'execution'], concreteTools: ['extension_creator', 'extension_creator_tool'] },
+  wallet: { categories: ['outbound'], concreteTools: ['wallet'] },
   mailbox: { categories: ['network', 'platform', 'outbound'], concreteTools: ['mailbox', 'inbox'] },
   ask_human: { categories: ['platform'], concreteTools: ['ask_human', 'human_loop'] },
   google_workspace: { categories: ['network'], concreteTools: ['google_workspace', 'gws'] },

package/src/lib/server/tool-planning.test.ts CHANGED Viewed

@@ -12,10 +12,11 @@ function uniqueExtensionId(prefix: string): string {
 describe('tool-planning', () => {
   it('collects core planning metadata for aliased built-in tools', () => {
-    const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'browser', 'manage_connectors'])
+    const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'manage_connectors'])
     assert.deepEqual(view.displayToolIds, ['browser', 'manage_connectors', 'web'])
     assert.deepEqual(getToolsForCapability(['web_search'], TOOL_CAPABILITY.researchSearch), ['web_search'])
+    assert.deepEqual(getToolsForCapability(['web_crawl'], TOOL_CAPABILITY.researchCrawl), ['web_crawl'])
     assert.deepEqual(getToolsForCapability(['manage_connectors'], TOOL_CAPABILITY.deliveryVoiceNote), ['connector_message_tool'])
   })

package/src/lib/server/tool-planning.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { canonicalizeExtensionId, expandExtensionIds } from './tool-aliases'
 export const TOOL_CAPABILITY = {
   researchSearch: 'research.search',
   researchFetch: 'research.fetch',
+  researchCrawl: 'research.crawl',
   browserNavigate: 'browser.navigate',
   browserCapture: 'browser.capture',
   artifactPdf: 'artifact.pdf',
@@ -98,6 +99,36 @@ const CORE_TOOL_PLANNING: Record<string, LegacyToolPlanningEntry[]> = {
         },
       ],
     },
+    {
+      toolName: 'web_extract',
+      capabilities: [TOOL_CAPABILITY.researchFetch],
+      disciplineGuidance: [
+        'For `web_extract`, use `{"url":"https://..."}` when source title and URL should remain attached to extracted page text.',
+        'Extract the exact pages you need, then synthesize. Do not extract the same page repeatedly.',
+      ],
+      requestMatchers: [
+        {
+          capability: TOOL_CAPABILITY.researchFetch,
+          patterns: ['extract', 'readable content', 'page text', 'source text'],
+          requireLiteralUrl: true,
+        },
+      ],
+    },
+    {
+      toolName: 'web_crawl',
+      capabilities: [TOOL_CAPABILITY.researchCrawl],
+      disciplineGuidance: [
+        'For `web_crawl`, use `{"url":"https://...","maxPages":5,"maxDepth":1}` only when a task needs several pages from the same site.',
+        'Keep crawls bounded and summarize after one crawl. Prefer `web_extract` for a single known URL.',
+      ],
+      requestMatchers: [
+        {
+          capability: TOOL_CAPABILITY.researchCrawl,
+          patterns: ['crawl', 'site map', 'sitemap', 'multiple pages', 'whole site', 'scan site'],
+          requireLiteralUrl: true,
+        },
+      ],
+    },
   ],
   browser: [
     {

package/src/lib/server/untrusted-content.ts CHANGED Viewed

@@ -4,11 +4,11 @@ const INJECTION_PATTERNS: Array<{ code: string; re: RegExp; note: string }> = [
   { code: 'ignore_instructions', re: /\bignore (?:all |any |the )?(?:previous|prior|above|system|developer) instructions\b/i, note: 'tries to override existing instructions' },
   { code: 'reveal_prompt', re: /\b(?:reveal|show|print|dump)\b[\s\S]{0,40}\b(?:system prompt|developer prompt|hidden prompt)\b/i, note: 'asks for hidden prompt data' },
   { code: 'credential_theft', re: /\b(?:api key|token|password|secret|credential)s?\b[\s\S]{0,40}\b(?:send|share|reveal|print|dump|exfiltrat)/i, note: 'asks for secrets or credentials' },
-  { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
+  { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|web_extract|web_crawl|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
   { code: 'workflow_override', re: /\b(?:act as|pretend to be)\b[\s\S]{0,40}\b(?:system|developer|administrator|operator)\b/i, note: 'tries to impersonate a higher-priority role' },
 ]
-const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'http_request'])
+const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request'])
 function normalizeMode(value: unknown): 'off' | 'warn' | 'block' {
   const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''

package/src/types/session.ts CHANGED Viewed

@@ -218,6 +218,8 @@ export type SessionTool =
   | 'qwen_code_cli'
   | 'web_search'
   | 'web_fetch'
+  | 'web_extract'
+  | 'web_crawl'
   | 'edit_file'
   | 'process'
   | 'spawn_subagent'