@swarmclawai/swarmclaw 1.9.21 → 1.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +23 -5
  2. package/package.json +2 -2
  3. package/src/components/chat/activity-moment.tsx +4 -0
  4. package/src/components/chat/tool-call-bubble.tsx +6 -0
  5. package/src/components/schedules/schedule-console.tsx +3 -0
  6. package/src/lib/server/capability-router.test.ts +4 -4
  7. package/src/lib/server/capability-router.ts +1 -0
  8. package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +27 -0
  9. package/src/lib/server/chat-execution/chat-execution-utils.ts +21 -0
  10. package/src/lib/server/chat-execution/iteration-event-handler.ts +1 -1
  11. package/src/lib/server/chat-execution/stream-continuation.ts +6 -2
  12. package/src/lib/server/plugins-advanced.test.ts +7 -3
  13. package/src/lib/server/runtime/scheduler.test.ts +129 -0
  14. package/src/lib/server/runtime/scheduler.ts +62 -35
  15. package/src/lib/server/schedules/schedule-history.test.ts +14 -0
  16. package/src/lib/server/schedules/schedule-history.ts +1 -0
  17. package/src/lib/server/schedules/schedule-lifecycle.ts +5 -28
  18. package/src/lib/server/schedules/schedule-normalization.ts +6 -28
  19. package/src/lib/server/schedules/schedule-timing.test.ts +80 -0
  20. package/src/lib/server/schedules/schedule-timing.ts +179 -0
  21. package/src/lib/server/session-tools/web-crawl.test.ts +106 -0
  22. package/src/lib/server/session-tools/web-inputs.test.ts +5 -0
  23. package/src/lib/server/session-tools/web-utils.ts +8 -2
  24. package/src/lib/server/session-tools/web.ts +256 -29
  25. package/src/lib/server/storage.ts +2 -0
  26. package/src/lib/server/tasks/task-lifecycle.ts +35 -5
  27. package/src/lib/server/tool-aliases.ts +1 -1
  28. package/src/lib/server/tool-capability-policy-advanced.test.ts +3 -3
  29. package/src/lib/server/tool-capability-policy.ts +4 -1
  30. package/src/lib/server/tool-planning.test.ts +2 -1
  31. package/src/lib/server/tool-planning.ts +31 -0
  32. package/src/lib/server/untrusted-content.ts +2 -2
  33. package/src/types/schedule.ts +2 -2
  34. package/src/types/session.ts +2 -0
  35. package/src/types/task.ts +1 -0
@@ -199,6 +199,149 @@ async function executeWebApiAction(normalized: Record<string, unknown>) {
199
199
  }, requestArgs)
200
200
  }
201
201
 
202
+ interface ExtractedWebPage {
203
+ url: string
204
+ title: string
205
+ text: string
206
+ links: string[]
207
+ }
208
+
209
+ function normalizeHttpUrl(rawUrl: string): string {
210
+ const trimmed = rawUrl.trim()
211
+ if (!trimmed) throw new Error('URL is required.')
212
+ const parsed = new URL(trimmed)
213
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
214
+ throw new Error('Only http and https URLs are supported.')
215
+ }
216
+ parsed.hash = ''
217
+ return parsed.toString()
218
+ }
219
+
220
+ function clampNumber(value: unknown, fallback: number, min: number, max: number): number {
221
+ const parsed = typeof value === 'number'
222
+ ? value
223
+ : typeof value === 'string'
224
+ ? Number.parseInt(value, 10)
225
+ : Number.NaN
226
+ if (!Number.isFinite(parsed)) return fallback
227
+ return Math.max(min, Math.min(max, Math.trunc(parsed)))
228
+ }
229
+
230
+ function extractLinks($: ReturnType<typeof cheerio.load>, pageUrl: string): string[] {
231
+ const links: string[] = []
232
+ $('a[href]').each((_index, element) => {
233
+ const rawHref = $(element).attr('href') || ''
234
+ try {
235
+ const resolved = new URL(rawHref, pageUrl)
236
+ if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:') return
237
+ resolved.hash = ''
238
+ const href = resolved.toString()
239
+ if (!links.includes(href)) links.push(href)
240
+ } catch {
241
+ // Ignore malformed links from the crawled page.
242
+ }
243
+ })
244
+ return links
245
+ }
246
+
247
+ async function extractReadablePage(fetchUrl: string): Promise<ExtractedWebPage> {
248
+ const url = normalizeHttpUrl(fetchUrl)
249
+ const res = await fetch(url, {
250
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
251
+ signal: AbortSignal.timeout(15000),
252
+ })
253
+ if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`)
254
+ const contentType = res.headers.get('content-type') || ''
255
+ if (contentType.includes('application/pdf')) {
256
+ const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
257
+ const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
258
+ const arrayBuffer = await res.arrayBuffer()
259
+ const result = await pdfParse(Buffer.from(arrayBuffer))
260
+ return { url, title: url, text: result.text, links: [] }
261
+ }
262
+
263
+ const html = await res.text()
264
+ const $ = cheerio.load(html)
265
+ const title = $('title').first().text().replace(/\s+/g, ' ').trim() || url
266
+ const links = extractLinks($, url)
267
+ $('script, style, noscript, nav, footer, header').remove()
268
+ const main = $('article, main, [role="main"]').first()
269
+ const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
270
+ return { url, title, text, links }
271
+ }
272
+
273
+ function formatExtractedPage(page: ExtractedWebPage): string {
274
+ const lines = [`Title: ${page.title}`, `URL: ${page.url}`, '', page.text || '(no readable text found)']
275
+ return truncate(lines.join('\n'), MAX_OUTPUT)
276
+ }
277
+
278
+ function formatCrawlResults(startUrl: string, pages: ExtractedWebPage[]): string {
279
+ if (pages.length === 0) return `No crawl results found for: ${startUrl}`
280
+ const sections = [`Crawl results for: ${startUrl}`, `Pages crawled: ${pages.length}`]
281
+ for (let index = 0; index < pages.length; index++) {
282
+ const page = pages[index]
283
+ const text = truncate(page.text || '(no readable text found)', 1200)
284
+ sections.push(`${index + 1}. ${page.title}\nURL: ${page.url}\nText: ${text}`)
285
+ }
286
+ return truncate(sections.join('\n\n'), MAX_OUTPUT)
287
+ }
288
+
289
+ async function executeWebExtractAction(normalized: Record<string, unknown>) {
290
+ const rawUrl = String(normalized.url || normalized.query || '')
291
+ if (!rawUrl.trim()) return 'Error: "url" is required for extract action.'
292
+ try {
293
+ return formatExtractedPage(await extractReadablePage(rawUrl))
294
+ } catch (err: unknown) {
295
+ return `Error: ${errorMessage(err)}`
296
+ }
297
+ }
298
+
299
+ async function executeWebCrawlAction(normalized: Record<string, unknown>) {
300
+ const rawUrl = String(normalized.url || normalized.query || '')
301
+ if (!rawUrl.trim()) return 'Error: "url" is required for crawl action.'
302
+
303
+ let startUrl: string
304
+ try {
305
+ startUrl = normalizeHttpUrl(rawUrl)
306
+ } catch (err: unknown) {
307
+ return `Error: ${errorMessage(err)}`
308
+ }
309
+
310
+ const maxPages = clampNumber(normalized.maxPages ?? normalized.maxResults, 5, 1, 25)
311
+ const maxDepth = clampNumber(normalized.maxDepth, 1, 0, 3)
312
+ const includeExternal = normalized.includeExternal === true || normalized.sameOrigin === false
313
+ const startOrigin = new URL(startUrl).origin
314
+ const queue: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }]
315
+ const seen = new Set<string>()
316
+ const pages: ExtractedWebPage[] = []
317
+
318
+ while (queue.length > 0 && pages.length < maxPages) {
319
+ const next = queue.shift()
320
+ if (!next) break
321
+ if (seen.has(next.url)) continue
322
+ seen.add(next.url)
323
+
324
+ let page: ExtractedWebPage
325
+ try {
326
+ page = await extractReadablePage(next.url)
327
+ } catch (err: unknown) {
328
+ page = { url: next.url, title: next.url, text: `Error: ${errorMessage(err)}`, links: [] }
329
+ }
330
+ pages.push(page)
331
+
332
+ if (next.depth >= maxDepth) continue
333
+ for (const link of page.links) {
334
+ if (seen.has(link)) continue
335
+ if (!includeExternal && new URL(link).origin !== startOrigin) continue
336
+ if (queue.some((entry) => entry.url === link)) continue
337
+ queue.push({ url: link, depth: next.depth + 1 })
338
+ if (queue.length + seen.size >= maxPages * 4) break
339
+ }
340
+ }
341
+
342
+ return formatCrawlResults(startUrl, pages)
343
+ }
344
+
202
345
  async function executeWebAction(args: Record<string, unknown>) {
203
346
  const normalized = normalizeToolInputArgs(args)
204
347
  const { query, url, maxResults } = normalized as { query?: string; url?: string; maxResults?: number }
@@ -219,32 +362,13 @@ async function executeWebAction(args: Record<string, unknown>) {
219
362
  const results = await provider.search(searchQuery, limit)
220
363
  if (results.length === 0) return 'No results found.'
221
364
  return formatWebSearchResults(searchQuery, results)
222
- } else if (action === 'fetch') {
365
+ } else if (action === 'fetch' || action === 'extract') {
223
366
  const fetchUrl = url || query
224
- if (!fetchUrl) return 'Error: "url" is required for fetch action.'
225
- const res = await fetch(fetchUrl, {
226
- headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
227
- signal: AbortSignal.timeout(15000),
228
- })
229
- if (!res.ok) return `HTTP ${res.status}: ${res.statusText}`
230
- const contentType = res.headers.get('content-type') || ''
231
- if (contentType.includes('application/pdf')) {
232
- try {
233
- const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
234
- const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
235
- const arrayBuffer = await res.arrayBuffer()
236
- const result = await pdfParse(Buffer.from(arrayBuffer))
237
- return truncate(result.text, MAX_OUTPUT)
238
- } catch (err: unknown) {
239
- return `Error parsing PDF: ${errorMessage(err)}`
240
- }
241
- }
242
- const html = await res.text()
243
- const $ = cheerio.load(html)
244
- $('script, style, noscript, nav, footer, header').remove()
245
- const main = $('article, main, [role="main"]').first()
246
- const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
247
- return truncate(text, MAX_OUTPUT)
367
+ if (!fetchUrl) return `Error: "url" is required for ${action} action.`
368
+ const page = await extractReadablePage(fetchUrl)
369
+ return action === 'extract' ? formatExtractedPage(page) : truncate(page.text, MAX_OUTPUT)
370
+ } else if (action === 'crawl') {
371
+ return executeWebCrawlAction(normalized)
248
372
  } else if (action === 'api') {
249
373
  return executeWebApiAction(normalized)
250
374
  }
@@ -259,21 +383,25 @@ async function executeWebAction(args: Record<string, unknown>) {
259
383
  */
260
384
  const WebExtension: Extension = {
261
385
  name: 'Core Web',
262
- description: 'Search the web, fetch content, and make HTTP API calls.',
386
+ description: 'Search the web, extract pages, crawl sites, and make HTTP API calls.',
263
387
  hooks: {
264
- getCapabilityDescription: () => 'I can use the unified `web` tool with action `search` for research, `fetch` for reading a URL, and `api` for raw HTTP API calls with full control over method/headers/body.',
388
+ getCapabilityDescription: () => 'I can use `web_search` for fresh research, `web_extract` for a specific URL, `web_crawl` for bounded multi-page site reads, and the unified `web` tool for search, fetch, crawl, and raw HTTP API calls.',
265
389
  } as ExtensionHooks,
266
390
  tools: [
267
391
  {
268
392
  name: 'web',
269
- description: 'Unified web access tool. Actions: search (web search), fetch (read URL content), api (raw HTTP request with method/headers/body).',
393
+ description: 'Unified web access tool. Actions: search (web search), fetch/extract (read URL content), crawl (bounded same-origin crawl), api (raw HTTP request with method/headers/body).',
270
394
  parameters: {
271
395
  type: 'object',
272
396
  properties: {
273
- action: { type: 'string', enum: ['search', 'fetch', 'api'] },
397
+ action: { type: 'string', enum: ['search', 'fetch', 'extract', 'crawl', 'api'] },
274
398
  query: { type: 'string' },
275
399
  url: { type: 'string' },
276
400
  maxResults: { type: 'number' },
401
+ maxPages: { type: 'number', description: 'Maximum pages for crawl action, default 5, max 25' },
402
+ maxDepth: { type: 'number', description: 'Maximum crawl depth, default 1, max 3' },
403
+ includeExternal: { type: 'boolean', description: 'Allow crawl to leave the starting origin, default false' },
404
+ sameOrigin: { type: 'boolean', description: 'Keep crawl on the starting origin when true, default true' },
277
405
  method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS'], description: 'HTTP method (for api action)' },
278
406
  headers: { type: 'object', additionalProperties: { type: 'string' }, description: 'Request headers (for api action)' },
279
407
  body: { type: 'string', description: 'Request body (for api action)' },
@@ -283,6 +411,71 @@ const WebExtension: Extension = {
283
411
  required: ['action']
284
412
  },
285
413
  execute: async (args) => executeWebAction(args)
414
+ },
415
+ {
416
+ name: 'web_search',
417
+ description: 'Search the web and return ranked results with URLs and snippets.',
418
+ parameters: {
419
+ type: 'object',
420
+ properties: {
421
+ query: { type: 'string' },
422
+ maxResults: { type: 'number' },
423
+ },
424
+ required: ['query'],
425
+ },
426
+ planning: {
427
+ capabilities: ['research.search'],
428
+ disciplineGuidance: ['Use `web_search` for fresh information, then fetch or extract only the sources you need.'],
429
+ },
430
+ execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'search' }),
431
+ },
432
+ {
433
+ name: 'web_fetch',
434
+ description: 'Read a specific URL and return readable page text.',
435
+ parameters: {
436
+ type: 'object',
437
+ properties: { url: { type: 'string' } },
438
+ required: ['url'],
439
+ },
440
+ planning: {
441
+ capabilities: ['research.fetch'],
442
+ disciplineGuidance: ['Use `web_fetch` when you already have a URL and only need the readable text.'],
443
+ },
444
+ execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'fetch' }),
445
+ },
446
+ {
447
+ name: 'web_extract',
448
+ description: 'Extract readable content from a URL with title and source URL included.',
449
+ parameters: {
450
+ type: 'object',
451
+ properties: { url: { type: 'string' } },
452
+ required: ['url'],
453
+ },
454
+ planning: {
455
+ capabilities: ['research.fetch'],
456
+ disciplineGuidance: ['Use `web_extract` for source-grounded page reads where the title and URL should stay attached to the extracted text.'],
457
+ },
458
+ execute: async (args) => executeWebExtractAction(normalizeToolInputArgs(args)),
459
+ },
460
+ {
461
+ name: 'web_crawl',
462
+ description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
463
+ parameters: {
464
+ type: 'object',
465
+ properties: {
466
+ url: { type: 'string' },
467
+ maxPages: { type: 'number' },
468
+ maxDepth: { type: 'number' },
469
+ includeExternal: { type: 'boolean' },
470
+ sameOrigin: { type: 'boolean' },
471
+ },
472
+ required: ['url'],
473
+ },
474
+ planning: {
475
+ capabilities: ['research.crawl'],
476
+ disciplineGuidance: ['Use `web_crawl` only when the task needs multiple pages from the same site. Keep maxPages low and summarize after one crawl.'],
477
+ },
478
+ execute: async (args) => executeWebCrawlAction(normalizeToolInputArgs(args)),
286
479
  }
287
480
  ]
288
481
  }
@@ -307,6 +500,40 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
307
500
  }
308
501
  )
309
502
  )
503
+ tools.push(
504
+ tool(
505
+ async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'search' }),
506
+ {
507
+ name: 'web_search',
508
+ description: 'Search the web and return ranked results with URLs and snippets.',
509
+ schema: z.object({}).passthrough()
510
+ }
511
+ ),
512
+ tool(
513
+ async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'fetch' }),
514
+ {
515
+ name: 'web_fetch',
516
+ description: 'Read a specific URL and return readable page text.',
517
+ schema: z.object({}).passthrough()
518
+ }
519
+ ),
520
+ tool(
521
+ async (args) => executeWebExtractAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
522
+ {
523
+ name: 'web_extract',
524
+ description: 'Extract readable content from a URL with title and source URL included.',
525
+ schema: z.object({}).passthrough()
526
+ }
527
+ ),
528
+ tool(
529
+ async (args) => executeWebCrawlAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
530
+ {
531
+ name: 'web_crawl',
532
+ description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
533
+ schema: z.object({}).passthrough()
534
+ }
535
+ )
536
+ )
310
537
  }
311
538
 
312
539
  // Browser tool (kept as direct injection for now due to complexity)
@@ -664,6 +664,8 @@ if (!IS_BUILD_BOOTSTRAP) {
664
664
  'files',
665
665
  'web_search',
666
666
  'web_fetch',
667
+ 'web_extract',
668
+ 'web_crawl',
667
669
  'browser',
668
670
  'manage_agents',
669
671
  'manage_tasks',
@@ -8,6 +8,9 @@ import {
8
8
  type TaskCompletionValidation,
9
9
  } from '@/lib/server/tasks/task-validation'
10
10
  import { syncTaskExecutionPolicyState } from '@/lib/server/tasks/task-execution-policy'
11
+ import { createMission, startMission } from '@/lib/server/missions/mission-service'
12
+ import { getMission } from '@/lib/server/missions/mission-repository'
13
+ import { loadSessions } from '@/lib/server/storage'
11
14
 
12
15
  export interface BuildBoardTaskInput {
13
16
  id?: string
@@ -84,6 +87,7 @@ export interface PrepareScheduledTaskRunOptions {
84
87
  | 'agentId'
85
88
  | 'taskPrompt'
86
89
  | 'linkedTaskId'
90
+ | 'linkedMissionId'
87
91
  | 'runNumber'
88
92
  | 'createdInSessionId'
89
93
  | 'createdByAgentId'
@@ -98,20 +102,45 @@ export interface PrepareScheduledTaskRunOptions {
98
102
  scheduleSignature?: string | null
99
103
  }
100
104
 
105
+ function ensureScheduleMission(schedule: PrepareScheduledTaskRunOptions['schedule']): string | null {
106
+ const existingMissionId = typeof schedule.linkedMissionId === 'string' ? schedule.linkedMissionId.trim() : ''
107
+ if (existingMissionId && getMission(existingMissionId)) return existingMissionId
108
+
109
+ const rootSessionId = typeof schedule.createdInSessionId === 'string' ? schedule.createdInSessionId.trim() : ''
110
+ if (!rootSessionId) return existingMissionId || null
111
+ const sessions = loadSessions()
112
+ if (!sessions[rootSessionId]) return existingMissionId || null
113
+
114
+ const mission = createMission({
115
+ title: `Scheduled task: ${schedule.name}`,
116
+ goal: schedule.taskPrompt || schedule.name,
117
+ successCriteria: ['Scheduled run is queued, executed, and reported back to the task board.'],
118
+ rootSessionId,
119
+ agentIds: [schedule.agentId].filter(Boolean),
120
+ reportSchedule: null,
121
+ })
122
+ startMission(mission.id)
123
+ schedule.linkedMissionId = mission.id
124
+ return mission.id
125
+ }
126
+
101
127
  export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions): { taskId: string; task: BoardTask } {
102
128
  const { schedule, tasks, now, scheduleSignature } = params
103
129
  const title = `[Sched] ${schedule.name} (run #${schedule.runNumber})`
104
130
  const existingTaskId = typeof schedule.linkedTaskId === 'string' ? schedule.linkedTaskId : ''
105
131
  const existingTask = existingTaskId ? tasks[existingTaskId] : null
132
+ const missionId = ensureScheduleMission(schedule)
106
133
 
107
134
  if (existingTask && existingTask.status !== 'queued' && existingTask.status !== 'running') {
135
+ const task = resetTaskForRerun(existingTask, {
136
+ title,
137
+ now,
138
+ runNumber: schedule.runNumber,
139
+ })
140
+ task.missionId = missionId
108
141
  return {
109
142
  taskId: existingTaskId,
110
- task: resetTaskForRerun(existingTask, {
111
- title,
112
- now,
113
- runNumber: schedule.runNumber,
114
- }),
143
+ task,
115
144
  }
116
145
  }
117
146
 
@@ -125,6 +154,7 @@ export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions):
125
154
  sourceScheduleId: schedule.id,
126
155
  sourceScheduleName: schedule.name,
127
156
  sourceScheduleKey: scheduleSignature || null,
157
+ missionId,
128
158
  createdInSessionId: schedule.createdInSessionId || null,
129
159
  createdByAgentId: schedule.createdByAgentId || null,
130
160
  followupConnectorId: schedule.followupConnectorId || null,
@@ -3,7 +3,7 @@ const EXTENSION_ALIAS_GROUPS: string[][] = [
3
3
  ['execute', 'sandbox'],
4
4
  ['files', 'read_file', 'write_file', 'list_files', 'copy_file', 'move_file', 'delete_file', 'send_file'],
5
5
  ['edit_file'],
6
- ['web', 'web_search', 'web_fetch', 'http_request', 'http'],
6
+ ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request', 'http'],
7
7
  ['browser', 'openclaw_browser'],
8
8
  ['delegate', 'claude_code', 'codex_cli', 'opencode_cli', 'gemini_cli', 'copilot_cli', 'droid_cli', 'cursor_cli', 'qwen_code_cli', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'],
9
9
  ['manage_platform'],
@@ -255,12 +255,12 @@ describe('explicit allows override mode blocks', () => {
255
255
  // Category blocks
256
256
  // ---------------------------------------------------------------------------
257
257
  describe('category blocks', () => {
258
- it('blocking network category blocks web, web_search, web_fetch', () => {
259
- const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'memory'], {
258
+ it('blocking network category blocks granular web tools', () => {
259
+ const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'memory'], {
260
260
  capabilityBlockedCategories: ['network'],
261
261
  })
262
262
  assert.deepStrictEqual(d.enabledExtensions, ['memory'])
263
- assert.equal(d.blockedExtensions.length, 3)
263
+ assert.equal(d.blockedExtensions.length, 5)
264
264
  for (const b of d.blockedExtensions) {
265
265
  assert.match(b.reason, /category "network"/)
266
266
  }
@@ -49,9 +49,11 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
49
49
  move_file: { categories: ['filesystem'], concreteTools: ['move_file'] },
50
50
  edit_file: { categories: ['filesystem'], concreteTools: ['edit_file'] },
51
51
  delete_file: { categories: ['filesystem'], concreteTools: ['delete_file'], destructive: true },
52
- web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch'] },
52
+ web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl'] },
53
53
  web_search: { categories: ['network'], concreteTools: ['web_search'] },
54
54
  web_fetch: { categories: ['network'], concreteTools: ['web_fetch'] },
55
+ web_extract: { categories: ['network'], concreteTools: ['web_extract'] },
56
+ web_crawl: { categories: ['network'], concreteTools: ['web_crawl'] },
55
57
  browser: { categories: ['browser', 'network'], concreteTools: ['browser', 'openclaw_browser'] },
56
58
  delegate: { categories: ['delegation', 'execution'], concreteTools: ['delegate', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'] },
57
59
  claude_code: { categories: ['delegation', 'execution'], concreteTools: ['delegate_to_claude_code'] },
@@ -85,6 +87,7 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
85
87
  spawn_subagent: { categories: ['delegation', 'platform'], concreteTools: ['spawn_subagent', 'delegate_to_agent'] },
86
88
  context_mgmt: { categories: ['memory'], concreteTools: ['context_mgmt', 'context_status', 'context_summarize'] },
87
89
  extension_creator: { categories: ['filesystem', 'execution'], concreteTools: ['extension_creator', 'extension_creator_tool'] },
90
+ wallet: { categories: ['outbound'], concreteTools: ['wallet'] },
88
91
  mailbox: { categories: ['network', 'platform', 'outbound'], concreteTools: ['mailbox', 'inbox'] },
89
92
  ask_human: { categories: ['platform'], concreteTools: ['ask_human', 'human_loop'] },
90
93
  google_workspace: { categories: ['network'], concreteTools: ['google_workspace', 'gws'] },
@@ -12,10 +12,11 @@ function uniqueExtensionId(prefix: string): string {
12
12
 
13
13
  describe('tool-planning', () => {
14
14
  it('collects core planning metadata for aliased built-in tools', () => {
15
- const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'browser', 'manage_connectors'])
15
+ const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'manage_connectors'])
16
16
 
17
17
  assert.deepEqual(view.displayToolIds, ['browser', 'manage_connectors', 'web'])
18
18
  assert.deepEqual(getToolsForCapability(['web_search'], TOOL_CAPABILITY.researchSearch), ['web_search'])
19
+ assert.deepEqual(getToolsForCapability(['web_crawl'], TOOL_CAPABILITY.researchCrawl), ['web_crawl'])
19
20
  assert.deepEqual(getToolsForCapability(['manage_connectors'], TOOL_CAPABILITY.deliveryVoiceNote), ['connector_message_tool'])
20
21
  })
21
22
 
@@ -7,6 +7,7 @@ import { canonicalizeExtensionId, expandExtensionIds } from './tool-aliases'
7
7
  export const TOOL_CAPABILITY = {
8
8
  researchSearch: 'research.search',
9
9
  researchFetch: 'research.fetch',
10
+ researchCrawl: 'research.crawl',
10
11
  browserNavigate: 'browser.navigate',
11
12
  browserCapture: 'browser.capture',
12
13
  artifactPdf: 'artifact.pdf',
@@ -98,6 +99,36 @@ const CORE_TOOL_PLANNING: Record<string, LegacyToolPlanningEntry[]> = {
98
99
  },
99
100
  ],
100
101
  },
102
+ {
103
+ toolName: 'web_extract',
104
+ capabilities: [TOOL_CAPABILITY.researchFetch],
105
+ disciplineGuidance: [
106
+ 'For `web_extract`, use `{"url":"https://..."}` when source title and URL should remain attached to extracted page text.',
107
+ 'Extract the exact pages you need, then synthesize. Do not extract the same page repeatedly.',
108
+ ],
109
+ requestMatchers: [
110
+ {
111
+ capability: TOOL_CAPABILITY.researchFetch,
112
+ patterns: ['extract', 'readable content', 'page text', 'source text'],
113
+ requireLiteralUrl: true,
114
+ },
115
+ ],
116
+ },
117
+ {
118
+ toolName: 'web_crawl',
119
+ capabilities: [TOOL_CAPABILITY.researchCrawl],
120
+ disciplineGuidance: [
121
+ 'For `web_crawl`, use `{"url":"https://...","maxPages":5,"maxDepth":1}` only when a task needs several pages from the same site.',
122
+ 'Keep crawls bounded and summarize after one crawl. Prefer `web_extract` for a single known URL.',
123
+ ],
124
+ requestMatchers: [
125
+ {
126
+ capability: TOOL_CAPABILITY.researchCrawl,
127
+ patterns: ['crawl', 'site map', 'sitemap', 'multiple pages', 'whole site', 'scan site'],
128
+ requireLiteralUrl: true,
129
+ },
130
+ ],
131
+ },
101
132
  ],
102
133
  browser: [
103
134
  {
@@ -4,11 +4,11 @@ const INJECTION_PATTERNS: Array<{ code: string; re: RegExp; note: string }> = [
4
4
  { code: 'ignore_instructions', re: /\bignore (?:all |any |the )?(?:previous|prior|above|system|developer) instructions\b/i, note: 'tries to override existing instructions' },
5
5
  { code: 'reveal_prompt', re: /\b(?:reveal|show|print|dump)\b[\s\S]{0,40}\b(?:system prompt|developer prompt|hidden prompt)\b/i, note: 'asks for hidden prompt data' },
6
6
  { code: 'credential_theft', re: /\b(?:api key|token|password|secret|credential)s?\b[\s\S]{0,40}\b(?:send|share|reveal|print|dump|exfiltrat)/i, note: 'asks for secrets or credentials' },
7
- { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
7
+ { code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|web_extract|web_crawl|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
8
8
  { code: 'workflow_override', re: /\b(?:act as|pretend to be)\b[\s\S]{0,40}\b(?:system|developer|administrator|operator)\b/i, note: 'tries to impersonate a higher-priority role' },
9
9
  ]
10
10
 
11
- const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'http_request'])
11
+ const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request'])
12
12
 
13
13
  function normalizeMode(value: unknown): 'off' | 'warn' | 'block' {
14
14
  const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''
@@ -3,7 +3,7 @@ import type { ExtensionManagedResourceMarker } from './extension'
3
3
  export type ScheduleType = 'cron' | 'interval' | 'once'
4
4
  export type ScheduleStatus = 'active' | 'paused' | 'completed' | 'failed' | 'archived'
5
5
  export type ScheduleTaskMode = 'task' | 'wake_only' | 'protocol'
6
- export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed'
6
+ export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed' | 'repaired'
7
7
 
8
8
  export interface ScheduleHistoryChange {
9
9
  field: string
@@ -55,7 +55,7 @@ export interface Schedule {
55
55
  nextRunAt?: number
56
56
  /** IANA timezone for schedule evaluation (default: system local) */
57
57
  timezone?: string | null
58
- /** Random stagger window in seconds added to nextRunAt to avoid thundering herd */
58
+ /** Deterministic stagger window in seconds added to nextRunAt to avoid thundering herd */
59
59
  staggerSec?: number | null
60
60
  /** Last delivery status for this schedule */
61
61
  lastDeliveryStatus?: 'ok' | 'error' | null
@@ -218,6 +218,8 @@ export type SessionTool =
218
218
  | 'qwen_code_cli'
219
219
  | 'web_search'
220
220
  | 'web_fetch'
221
+ | 'web_extract'
222
+ | 'web_crawl'
221
223
  | 'edit_file'
222
224
  | 'process'
223
225
  | 'spawn_subagent'
package/src/types/task.ts CHANGED
@@ -182,6 +182,7 @@ export interface BoardTask {
182
182
  cwd?: string | null
183
183
  file?: string | null
184
184
  sessionId?: string | null
185
+ missionId?: string | null
185
186
  completionReportPath?: string | null
186
187
  result?: string | null
187
188
  error?: string | null