npm - @swarmclawai/swarmclaw - Versions diffs - 1.9.21 → 1.9.23 - Mend

@swarmclawai/swarmclaw 1.9.21 → 1.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +23 -5
package/package.json +2 -2
package/src/components/chat/activity-moment.tsx +4 -0
package/src/components/chat/tool-call-bubble.tsx +6 -0
package/src/components/schedules/schedule-console.tsx +3 -0
package/src/lib/server/capability-router.test.ts +4 -4
package/src/lib/server/capability-router.ts +1 -0
package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +27 -0
package/src/lib/server/chat-execution/chat-execution-utils.ts +21 -0
package/src/lib/server/chat-execution/iteration-event-handler.ts +1 -1
package/src/lib/server/chat-execution/stream-continuation.ts +6 -2
package/src/lib/server/plugins-advanced.test.ts +7 -3
package/src/lib/server/runtime/scheduler.test.ts +129 -0
package/src/lib/server/runtime/scheduler.ts +62 -35
package/src/lib/server/schedules/schedule-history.test.ts +14 -0
package/src/lib/server/schedules/schedule-history.ts +1 -0
package/src/lib/server/schedules/schedule-lifecycle.ts +5 -28
package/src/lib/server/schedules/schedule-normalization.ts +6 -28
package/src/lib/server/schedules/schedule-timing.test.ts +80 -0
package/src/lib/server/schedules/schedule-timing.ts +179 -0
package/src/lib/server/session-tools/web-crawl.test.ts +106 -0
package/src/lib/server/session-tools/web-inputs.test.ts +5 -0
package/src/lib/server/session-tools/web-utils.ts +8 -2
package/src/lib/server/session-tools/web.ts +256 -29
package/src/lib/server/storage.ts +2 -0
package/src/lib/server/tasks/task-lifecycle.ts +35 -5
package/src/lib/server/tool-aliases.ts +1 -1
package/src/lib/server/tool-capability-policy-advanced.test.ts +3 -3
package/src/lib/server/tool-capability-policy.ts +4 -1
package/src/lib/server/tool-planning.test.ts +2 -1
package/src/lib/server/tool-planning.ts +31 -0
package/src/lib/server/untrusted-content.ts +2 -2
package/src/types/schedule.ts +2 -2
package/src/types/session.ts +2 -0
package/src/types/task.ts +1 -0

package/src/lib/server/session-tools/web.ts CHANGED Viewed

@@ -199,6 +199,149 @@ async function executeWebApiAction(normalized: Record<string, unknown>) {
   }, requestArgs)
 }
+interface ExtractedWebPage {
+  url: string
+  title: string
+  text: string
+  links: string[]
+}
+function normalizeHttpUrl(rawUrl: string): string {
+  const trimmed = rawUrl.trim()
+  if (!trimmed) throw new Error('URL is required.')
+  const parsed = new URL(trimmed)
+  if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
+    throw new Error('Only http and https URLs are supported.')
+  }
+  parsed.hash = ''
+  return parsed.toString()
+}
+function clampNumber(value: unknown, fallback: number, min: number, max: number): number {
+  const parsed = typeof value === 'number'
+    ? value
+    : typeof value === 'string'
+      ? Number.parseInt(value, 10)
+      : Number.NaN
+  if (!Number.isFinite(parsed)) return fallback
+  return Math.max(min, Math.min(max, Math.trunc(parsed)))
+}
+function extractLinks($: ReturnType<typeof cheerio.load>, pageUrl: string): string[] {
+  const links: string[] = []
+  $('a[href]').each((_index, element) => {
+    const rawHref = $(element).attr('href') || ''
+    try {
+      const resolved = new URL(rawHref, pageUrl)
+      if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:') return
+      resolved.hash = ''
+      const href = resolved.toString()
+      if (!links.includes(href)) links.push(href)
+    } catch {
+      // Ignore malformed links from the crawled page.
+    }
+  })
+  return links
+}
+async function extractReadablePage(fetchUrl: string): Promise<ExtractedWebPage> {
+  const url = normalizeHttpUrl(fetchUrl)
+  const res = await fetch(url, {
+    headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
+    signal: AbortSignal.timeout(15000),
+  })
+  if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`)
+  const contentType = res.headers.get('content-type') || ''
+  if (contentType.includes('application/pdf')) {
+    const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
+    const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
+    const arrayBuffer = await res.arrayBuffer()
+    const result = await pdfParse(Buffer.from(arrayBuffer))
+    return { url, title: url, text: result.text, links: [] }
+  }
+  const html = await res.text()
+  const $ = cheerio.load(html)
+  const title = $('title').first().text().replace(/\s+/g, ' ').trim() || url
+  const links = extractLinks($, url)
+  $('script, style, noscript, nav, footer, header').remove()
+  const main = $('article, main, [role="main"]').first()
+  const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
+  return { url, title, text, links }
+}
+function formatExtractedPage(page: ExtractedWebPage): string {
+  const lines = [`Title: ${page.title}`, `URL: ${page.url}`, '', page.text || '(no readable text found)']
+  return truncate(lines.join('\n'), MAX_OUTPUT)
+}
+function formatCrawlResults(startUrl: string, pages: ExtractedWebPage[]): string {
+  if (pages.length === 0) return `No crawl results found for: ${startUrl}`
+  const sections = [`Crawl results for: ${startUrl}`, `Pages crawled: ${pages.length}`]
+  for (let index = 0; index < pages.length; index++) {
+    const page = pages[index]
+    const text = truncate(page.text || '(no readable text found)', 1200)
+    sections.push(`${index + 1}. ${page.title}\nURL: ${page.url}\nText: ${text}`)
+  }
+  return truncate(sections.join('\n\n'), MAX_OUTPUT)
+}
+async function executeWebExtractAction(normalized: Record<string, unknown>) {
+  const rawUrl = String(normalized.url || normalized.query || '')
+  if (!rawUrl.trim()) return 'Error: "url" is required for extract action.'
+  try {
+    return formatExtractedPage(await extractReadablePage(rawUrl))
+  } catch (err: unknown) {
+    return `Error: ${errorMessage(err)}`
+  }
+}
+async function executeWebCrawlAction(normalized: Record<string, unknown>) {
+  const rawUrl = String(normalized.url || normalized.query || '')
+  if (!rawUrl.trim()) return 'Error: "url" is required for crawl action.'
+  let startUrl: string
+  try {
+    startUrl = normalizeHttpUrl(rawUrl)
+  } catch (err: unknown) {
+    return `Error: ${errorMessage(err)}`
+  }
+  const maxPages = clampNumber(normalized.maxPages ?? normalized.maxResults, 5, 1, 25)
+  const maxDepth = clampNumber(normalized.maxDepth, 1, 0, 3)
+  const includeExternal = normalized.includeExternal === true || normalized.sameOrigin === false
+  const startOrigin = new URL(startUrl).origin
+  const queue: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }]
+  const seen = new Set<string>()
+  const pages: ExtractedWebPage[] = []
+  while (queue.length > 0 && pages.length < maxPages) {
+    const next = queue.shift()
+    if (!next) break
+    if (seen.has(next.url)) continue
+    seen.add(next.url)
+    let page: ExtractedWebPage
+    try {
+      page = await extractReadablePage(next.url)
+    } catch (err: unknown) {
+      page = { url: next.url, title: next.url, text: `Error: ${errorMessage(err)}`, links: [] }
+    }
+    pages.push(page)
+    if (next.depth >= maxDepth) continue
+    for (const link of page.links) {
+      if (seen.has(link)) continue
+      if (!includeExternal && new URL(link).origin !== startOrigin) continue
+      if (queue.some((entry) => entry.url === link)) continue
+      queue.push({ url: link, depth: next.depth + 1 })
+      if (queue.length + seen.size >= maxPages * 4) break
+    }
+  }
+  return formatCrawlResults(startUrl, pages)
+}
 async function executeWebAction(args: Record<string, unknown>) {
   const normalized = normalizeToolInputArgs(args)
   const { query, url, maxResults } = normalized as { query?: string; url?: string; maxResults?: number }
@@ -219,32 +362,13 @@ async function executeWebAction(args: Record<string, unknown>) {
       const results = await provider.search(searchQuery, limit)
       if (results.length === 0) return 'No results found.'
       return formatWebSearchResults(searchQuery, results)
-    } else if (action === 'fetch') {
+    } else if (action === 'fetch' || action === 'extract') {
       const fetchUrl = url || query
-      if (!fetchUrl) return 'Error: "url" is required for fetch action.'
-      const res = await fetch(fetchUrl, {
-        headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
-        signal: AbortSignal.timeout(15000),
-      })
-      if (!res.ok) return `HTTP ${res.status}: ${res.statusText}`
-      const contentType = res.headers.get('content-type') || ''
-      if (contentType.includes('application/pdf')) {
-        try {
-          const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
-          const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
-          const arrayBuffer = await res.arrayBuffer()
-          const result = await pdfParse(Buffer.from(arrayBuffer))
-          return truncate(result.text, MAX_OUTPUT)
-        } catch (err: unknown) {
-          return `Error parsing PDF: ${errorMessage(err)}`
-        }
-      }
-      const html = await res.text()
-      const $ = cheerio.load(html)
-      $('script, style, noscript, nav, footer, header').remove()
-      const main = $('article, main, [role="main"]').first()
-      const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
-      return truncate(text, MAX_OUTPUT)
+      if (!fetchUrl) return `Error: "url" is required for ${action} action.`
+      const page = await extractReadablePage(fetchUrl)
+      return action === 'extract' ? formatExtractedPage(page) : truncate(page.text, MAX_OUTPUT)
+    } else if (action === 'crawl') {
+      return executeWebCrawlAction(normalized)
     } else if (action === 'api') {
       return executeWebApiAction(normalized)
     }
@@ -259,21 +383,25 @@ async function executeWebAction(args: Record<string, unknown>) {
  */
 const WebExtension: Extension = {
   name: 'Core Web',
-  description: 'Search the web, fetch content, and make HTTP API calls.',
+  description: 'Search the web, extract pages, crawl sites, and make HTTP API calls.',
   hooks: {
-    getCapabilityDescription: () => 'I can use the unified `web` tool with action `search` for research, `fetch` for reading a URL, and `api` for raw HTTP API calls with full control over method/headers/body.',
+    getCapabilityDescription: () => 'I can use `web_search` for fresh research, `web_extract` for a specific URL, `web_crawl` for bounded multi-page site reads, and the unified `web` tool for search, fetch, crawl, and raw HTTP API calls.',
   } as ExtensionHooks,
   tools: [
     {
       name: 'web',
-      description: 'Unified web access tool. Actions: search (web search), fetch (read URL content), api (raw HTTP request with method/headers/body).',
+      description: 'Unified web access tool. Actions: search (web search), fetch/extract (read URL content), crawl (bounded same-origin crawl), api (raw HTTP request with method/headers/body).',
       parameters: {
         type: 'object',
         properties: {
-          action: { type: 'string', enum: ['search', 'fetch', 'api'] },
+          action: { type: 'string', enum: ['search', 'fetch', 'extract', 'crawl', 'api'] },
           query: { type: 'string' },
           url: { type: 'string' },
           maxResults: { type: 'number' },
+          maxPages: { type: 'number', description: 'Maximum pages for crawl action, default 5, max 25' },
+          maxDepth: { type: 'number', description: 'Maximum crawl depth, default 1, max 3' },
+          includeExternal: { type: 'boolean', description: 'Allow crawl to leave the starting origin, default false' },
+          sameOrigin: { type: 'boolean', description: 'Keep crawl on the starting origin when true, default true' },
           method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS'], description: 'HTTP method (for api action)' },
           headers: { type: 'object', additionalProperties: { type: 'string' }, description: 'Request headers (for api action)' },
           body: { type: 'string', description: 'Request body (for api action)' },
@@ -283,6 +411,71 @@ const WebExtension: Extension = {
         required: ['action']
       },
       execute: async (args) => executeWebAction(args)
+    },
+    {
+      name: 'web_search',
+      description: 'Search the web and return ranked results with URLs and snippets.',
+      parameters: {
+        type: 'object',
+        properties: {
+          query: { type: 'string' },
+          maxResults: { type: 'number' },
+        },
+        required: ['query'],
+      },
+      planning: {
+        capabilities: ['research.search'],
+        disciplineGuidance: ['Use `web_search` for fresh information, then fetch or extract only the sources you need.'],
+      },
+      execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'search' }),
+    },
+    {
+      name: 'web_fetch',
+      description: 'Read a specific URL and return readable page text.',
+      parameters: {
+        type: 'object',
+        properties: { url: { type: 'string' } },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.fetch'],
+        disciplineGuidance: ['Use `web_fetch` when you already have a URL and only need the readable text.'],
+      },
+      execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'fetch' }),
+    },
+    {
+      name: 'web_extract',
+      description: 'Extract readable content from a URL with title and source URL included.',
+      parameters: {
+        type: 'object',
+        properties: { url: { type: 'string' } },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.fetch'],
+        disciplineGuidance: ['Use `web_extract` for source-grounded page reads where the title and URL should stay attached to the extracted text.'],
+      },
+      execute: async (args) => executeWebExtractAction(normalizeToolInputArgs(args)),
+    },
+    {
+      name: 'web_crawl',
+      description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
+      parameters: {
+        type: 'object',
+        properties: {
+          url: { type: 'string' },
+          maxPages: { type: 'number' },
+          maxDepth: { type: 'number' },
+          includeExternal: { type: 'boolean' },
+          sameOrigin: { type: 'boolean' },
+        },
+        required: ['url'],
+      },
+      planning: {
+        capabilities: ['research.crawl'],
+        disciplineGuidance: ['Use `web_crawl` only when the task needs multiple pages from the same site. Keep maxPages low and summarize after one crawl.'],
+      },
+      execute: async (args) => executeWebCrawlAction(normalizeToolInputArgs(args)),
     }
   ]
 }
@@ -307,6 +500,40 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
         }
       )
     )
+    tools.push(
+      tool(
+        async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'search' }),
+        {
+          name: 'web_search',
+          description: 'Search the web and return ranked results with URLs and snippets.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'fetch' }),
+        {
+          name: 'web_fetch',
+          description: 'Read a specific URL and return readable page text.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebExtractAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
+        {
+          name: 'web_extract',
+          description: 'Extract readable content from a URL with title and source URL included.',
+          schema: z.object({}).passthrough()
+        }
+      ),
+      tool(
+        async (args) => executeWebCrawlAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
+        {
+          name: 'web_crawl',
+          description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
+          schema: z.object({}).passthrough()
+        }
+      )
+    )
   }
   // Browser tool (kept as direct injection for now due to complexity)

package/src/lib/server/storage.ts CHANGED Viewed

@@ -664,6 +664,8 @@ if (!IS_BUILD_BOOTSTRAP) {
     'files',
     'web_search',
     'web_fetch',
+    'web_extract',
+    'web_crawl',
     'browser',
     'manage_agents',
     'manage_tasks',

package/src/lib/server/tasks/task-lifecycle.ts CHANGED Viewed

@@ -8,6 +8,9 @@ import {
   type TaskCompletionValidation,
 } from '@/lib/server/tasks/task-validation'
 import { syncTaskExecutionPolicyState } from '@/lib/server/tasks/task-execution-policy'
+import { createMission, startMission } from '@/lib/server/missions/mission-service'
+import { getMission } from '@/lib/server/missions/mission-repository'
+import { loadSessions } from '@/lib/server/storage'
 export interface BuildBoardTaskInput {
   id?: string
@@ -84,6 +87,7 @@ export interface PrepareScheduledTaskRunOptions {
     | 'agentId'
     | 'taskPrompt'
     | 'linkedTaskId'
+    | 'linkedMissionId'
     | 'runNumber'
     | 'createdInSessionId'
     | 'createdByAgentId'
@@ -98,20 +102,45 @@ export interface PrepareScheduledTaskRunOptions {
   scheduleSignature?: string | null
 }
+function ensureScheduleMission(schedule: PrepareScheduledTaskRunOptions['schedule']): string | null {
+  const existingMissionId = typeof schedule.linkedMissionId === 'string' ? schedule.linkedMissionId.trim() : ''
+  if (existingMissionId && getMission(existingMissionId)) return existingMissionId
+  const rootSessionId = typeof schedule.createdInSessionId === 'string' ? schedule.createdInSessionId.trim() : ''
+  if (!rootSessionId) return existingMissionId || null
+  const sessions = loadSessions()
+  if (!sessions[rootSessionId]) return existingMissionId || null
+  const mission = createMission({
+    title: `Scheduled task: ${schedule.name}`,
+    goal: schedule.taskPrompt || schedule.name,
+    successCriteria: ['Scheduled run is queued, executed, and reported back to the task board.'],
+    rootSessionId,
+    agentIds: [schedule.agentId].filter(Boolean),
+    reportSchedule: null,
+  })
+  startMission(mission.id)
+  schedule.linkedMissionId = mission.id
+  return mission.id
+}
 export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions): { taskId: string; task: BoardTask } {
   const { schedule, tasks, now, scheduleSignature } = params
   const title = `[Sched] ${schedule.name} (run #${schedule.runNumber})`
   const existingTaskId = typeof schedule.linkedTaskId === 'string' ? schedule.linkedTaskId : ''
   const existingTask = existingTaskId ? tasks[existingTaskId] : null
+  const missionId = ensureScheduleMission(schedule)
   if (existingTask && existingTask.status !== 'queued' && existingTask.status !== 'running') {
+    const task = resetTaskForRerun(existingTask, {
+      title,
+      now,
+      runNumber: schedule.runNumber,
+    })
+    task.missionId = missionId
     return {
       taskId: existingTaskId,
-      task: resetTaskForRerun(existingTask, {
-        title,
-        now,
-        runNumber: schedule.runNumber,
-      }),
+      task,
     }
   }
@@ -125,6 +154,7 @@ export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions):
       sourceScheduleId: schedule.id,
       sourceScheduleName: schedule.name,
       sourceScheduleKey: scheduleSignature || null,
+      missionId,
       createdInSessionId: schedule.createdInSessionId || null,
       createdByAgentId: schedule.createdByAgentId || null,
       followupConnectorId: schedule.followupConnectorId || null,

package/src/lib/server/tool-aliases.ts CHANGED Viewed

@@ -3,7 +3,7 @@ const EXTENSION_ALIAS_GROUPS: string[][] = [
   ['execute', 'sandbox'],
   ['files', 'read_file', 'write_file', 'list_files', 'copy_file', 'move_file', 'delete_file', 'send_file'],
   ['edit_file'],
-  ['web', 'web_search', 'web_fetch', 'http_request', 'http'],
+  ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request', 'http'],
   ['browser', 'openclaw_browser'],
   ['delegate', 'claude_code', 'codex_cli', 'opencode_cli', 'gemini_cli', 'copilot_cli', 'droid_cli', 'cursor_cli', 'qwen_code_cli', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'],
   ['manage_platform'],

package/src/lib/server/tool-capability-policy-advanced.test.ts CHANGED Viewed

@@ -255,12 +255,12 @@ describe('explicit allows override mode blocks', () => {
 // Category blocks
 // ---------------------------------------------------------------------------
 describe('category blocks', () => {
-  it('blocking network category blocks web, web_search, web_fetch', () => {
-    const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'memory'], {
+  it('blocking network category blocks granular web tools', () => {
+    const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'memory'], {
       capabilityBlockedCategories: ['network'],
     })
     assert.deepStrictEqual(d.enabledExtensions, ['memory'])
-    assert.equal(d.blockedExtensions.length, 3)
+    assert.equal(d.blockedExtensions.length, 5)
     for (const b of d.blockedExtensions) {
       assert.match(b.reason, /category "network"/)
     }

package/src/lib/server/tool-capability-policy.ts CHANGED Viewed

@@ -49,9 +49,11 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
   move_file: { categories: ['filesystem'], concreteTools: ['move_file'] },
   edit_file: { categories: ['filesystem'], concreteTools: ['edit_file'] },
   delete_file: { categories: ['filesystem'], concreteTools: ['delete_file'], destructive: true },
-  web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch'] },
+  web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl'] },
   web_search: { categories: ['network'], concreteTools: ['web_search'] },
   web_fetch: { categories: ['network'], concreteTools: ['web_fetch'] },
+  web_extract: { categories: ['network'], concreteTools: ['web_extract'] },
+  web_crawl: { categories: ['network'], concreteTools: ['web_crawl'] },
   browser: { categories: ['browser', 'network'], concreteTools: ['browser', 'openclaw_browser'] },
   delegate: { categories: ['delegation', 'execution'], concreteTools: ['delegate', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'] },
   claude_code: { categories: ['delegation', 'execution'], concreteTools: ['delegate_to_claude_code'] },
@@ -85,6 +87,7 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
   spawn_subagent: { categories: ['delegation', 'platform'], concreteTools: ['spawn_subagent', 'delegate_to_agent'] },
   context_mgmt: { categories: ['memory'], concreteTools: ['context_mgmt', 'context_status', 'context_summarize'] },
   extension_creator: { categories: ['filesystem', 'execution'], concreteTools: ['extension_creator', 'extension_creator_tool'] },
+  wallet: { categories: ['outbound'], concreteTools: ['wallet'] },
   mailbox: { categories: ['network', 'platform', 'outbound'], concreteTools: ['mailbox', 'inbox'] },
   ask_human: { categories: ['platform'], concreteTools: ['ask_human', 'human_loop'] },
   google_workspace: { categories: ['network'], concreteTools: ['google_workspace', 'gws'] },

package/src/lib/server/tool-planning.test.ts CHANGED Viewed

@@ -12,10 +12,11 @@ function uniqueExtensionId(prefix: string): string {
 describe('tool-planning', () => {
   it('collects core planning metadata for aliased built-in tools', () => {
-    const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'browser', 'manage_connectors'])
+    const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'manage_connectors'])
     assert.deepEqual(view.displayToolIds, ['browser', 'manage_connectors', 'web'])
     assert.deepEqual(getToolsForCapability(['web_search'], TOOL_CAPABILITY.researchSearch), ['web_search'])
+    assert.deepEqual(getToolsForCapability(['web_crawl'], TOOL_CAPABILITY.researchCrawl), ['web_crawl'])
     assert.deepEqual(getToolsForCapability(['manage_connectors'], TOOL_CAPABILITY.deliveryVoiceNote), ['connector_message_tool'])
   })

package/src/lib/server/tool-planning.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { canonicalizeExtensionId, expandExtensionIds } from './tool-aliases'
 export const TOOL_CAPABILITY = {
   researchSearch: 'research.search',
   researchFetch: 'research.fetch',
+  researchCrawl: 'research.crawl',
   browserNavigate: 'browser.navigate',
   browserCapture: 'browser.capture',
   artifactPdf: 'artifact.pdf',
@@ -98,6 +99,36 @@ const CORE_TOOL_PLANNING: Record<string, LegacyToolPlanningEntry[]> = {
         },
       ],
     },
+    {
+      toolName: 'web_extract',
+      capabilities: [TOOL_CAPABILITY.researchFetch],
+      disciplineGuidance: [
+        'For `web_extract`, use `{"url":"https://..."}` when source title and URL should remain attached to extracted page text.',
+        'Extract the exact pages you need, then synthesize. Do not extract the same page repeatedly.',
+      ],
+      requestMatchers: [
+        {
+          capability: TOOL_CAPABILITY.researchFetch,
+          patterns: ['extract', 'readable content', 'page text', 'source text'],
+          requireLiteralUrl: true,
+        },
+      ],
+    },
+    {
+      toolName: 'web_crawl',
+      capabilities: [TOOL_CAPABILITY.researchCrawl],
+      disciplineGuidance: [
+        'For `web_crawl`, use `{"url":"https://...","maxPages":5,"maxDepth":1}` only when a task needs several pages from the same site.',
+        'Keep crawls bounded and summarize after one crawl. Prefer `web_extract` for a single known URL.',
+      ],
+      requestMatchers: [
+        {
+          capability: TOOL_CAPABILITY.researchCrawl,
+          patterns: ['crawl', 'site map', 'sitemap', 'multiple pages', 'whole site', 'scan site'],
+          requireLiteralUrl: true,
+        },
+      ],
+    },
   ],
   browser: [
     {

package/src/lib/server/untrusted-content.ts CHANGED Viewed

@@ -4,11 +4,11 @@ const INJECTION_PATTERNS: Array<{ code: string; re: RegExp; note: string }> = [
   { code: 'ignore_instructions', re: /\bignore (?:all |any |the )?(?:previous|prior|above|system|developer) instructions\b/i, note: 'tries to override existing instructions' },
   { code: 'reveal_prompt', re: /\b(?:reveal|show|print|dump)\b[\s\S]{0,40}\b(?:system prompt|developer prompt|hidden prompt)\b/i, note: 'asks for hidden prompt data' },
   { code: 'credential_theft', re: /\b(?:api key|token|password|secret|credential)s?\b[\s\S]{0,40}\b(?:send|share|reveal|print|dump|exfiltrat)/i, note: 'asks for secrets or credentials' },
-  { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
+  { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|web_extract|web_crawl|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
   { code: 'workflow_override', re: /\b(?:act as|pretend to be)\b[\s\S]{0,40}\b(?:system|developer|administrator|operator)\b/i, note: 'tries to impersonate a higher-priority role' },
 ]
-const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'http_request'])
+const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request'])
 function normalizeMode(value: unknown): 'off' | 'warn' | 'block' {
   const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''

package/src/types/schedule.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import type { ExtensionManagedResourceMarker } from './extension'
 export type ScheduleType = 'cron' | 'interval' | 'once'
 export type ScheduleStatus = 'active' | 'paused' | 'completed' | 'failed' | 'archived'
 export type ScheduleTaskMode = 'task' | 'wake_only' | 'protocol'
-export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed'
+export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed' | 'repaired'
 export interface ScheduleHistoryChange {
   field: string
@@ -55,7 +55,7 @@ export interface Schedule {
   nextRunAt?: number
   /** IANA timezone for schedule evaluation (default: system local) */
   timezone?: string | null
-  /** Random stagger window in seconds added to nextRunAt to avoid thundering herd */
+  /** Deterministic stagger window in seconds added to nextRunAt to avoid thundering herd */
   staggerSec?: number | null
   /** Last delivery status for this schedule */
   lastDeliveryStatus?: 'ok' | 'error' | null

package/src/types/session.ts CHANGED Viewed

@@ -218,6 +218,8 @@ export type SessionTool =
   | 'qwen_code_cli'
   | 'web_search'
   | 'web_fetch'
+  | 'web_extract'
+  | 'web_crawl'
   | 'edit_file'
   | 'process'
   | 'spawn_subagent'

package/src/types/task.ts CHANGED Viewed

@@ -182,6 +182,7 @@ export interface BoardTask {
   cwd?: string | null
   file?: string | null
   sessionId?: string | null
+  missionId?: string | null
   completionReportPath?: string | null
   result?: string | null
   error?: string | null