@swarmclawai/swarmclaw 1.9.21 → 1.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/package.json +2 -2
- package/src/components/chat/activity-moment.tsx +4 -0
- package/src/components/chat/tool-call-bubble.tsx +6 -0
- package/src/components/schedules/schedule-console.tsx +3 -0
- package/src/lib/server/capability-router.test.ts +4 -4
- package/src/lib/server/capability-router.ts +1 -0
- package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +27 -0
- package/src/lib/server/chat-execution/chat-execution-utils.ts +21 -0
- package/src/lib/server/chat-execution/iteration-event-handler.ts +1 -1
- package/src/lib/server/chat-execution/stream-continuation.ts +6 -2
- package/src/lib/server/plugins-advanced.test.ts +7 -3
- package/src/lib/server/runtime/scheduler.test.ts +129 -0
- package/src/lib/server/runtime/scheduler.ts +62 -35
- package/src/lib/server/schedules/schedule-history.test.ts +14 -0
- package/src/lib/server/schedules/schedule-history.ts +1 -0
- package/src/lib/server/schedules/schedule-lifecycle.ts +5 -28
- package/src/lib/server/schedules/schedule-normalization.ts +6 -28
- package/src/lib/server/schedules/schedule-timing.test.ts +80 -0
- package/src/lib/server/schedules/schedule-timing.ts +179 -0
- package/src/lib/server/session-tools/web-crawl.test.ts +106 -0
- package/src/lib/server/session-tools/web-inputs.test.ts +5 -0
- package/src/lib/server/session-tools/web-utils.ts +8 -2
- package/src/lib/server/session-tools/web.ts +256 -29
- package/src/lib/server/storage.ts +2 -0
- package/src/lib/server/tasks/task-lifecycle.ts +35 -5
- package/src/lib/server/tool-aliases.ts +1 -1
- package/src/lib/server/tool-capability-policy-advanced.test.ts +3 -3
- package/src/lib/server/tool-capability-policy.ts +4 -1
- package/src/lib/server/tool-planning.test.ts +2 -1
- package/src/lib/server/tool-planning.ts +31 -0
- package/src/lib/server/untrusted-content.ts +2 -2
- package/src/types/schedule.ts +2 -2
- package/src/types/session.ts +2 -0
- package/src/types/task.ts +1 -0
|
@@ -199,6 +199,149 @@ async function executeWebApiAction(normalized: Record<string, unknown>) {
|
|
|
199
199
|
}, requestArgs)
|
|
200
200
|
}
|
|
201
201
|
|
|
202
|
+
interface ExtractedWebPage {
|
|
203
|
+
url: string
|
|
204
|
+
title: string
|
|
205
|
+
text: string
|
|
206
|
+
links: string[]
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function normalizeHttpUrl(rawUrl: string): string {
|
|
210
|
+
const trimmed = rawUrl.trim()
|
|
211
|
+
if (!trimmed) throw new Error('URL is required.')
|
|
212
|
+
const parsed = new URL(trimmed)
|
|
213
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
214
|
+
throw new Error('Only http and https URLs are supported.')
|
|
215
|
+
}
|
|
216
|
+
parsed.hash = ''
|
|
217
|
+
return parsed.toString()
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function clampNumber(value: unknown, fallback: number, min: number, max: number): number {
|
|
221
|
+
const parsed = typeof value === 'number'
|
|
222
|
+
? value
|
|
223
|
+
: typeof value === 'string'
|
|
224
|
+
? Number.parseInt(value, 10)
|
|
225
|
+
: Number.NaN
|
|
226
|
+
if (!Number.isFinite(parsed)) return fallback
|
|
227
|
+
return Math.max(min, Math.min(max, Math.trunc(parsed)))
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function extractLinks($: ReturnType<typeof cheerio.load>, pageUrl: string): string[] {
|
|
231
|
+
const links: string[] = []
|
|
232
|
+
$('a[href]').each((_index, element) => {
|
|
233
|
+
const rawHref = $(element).attr('href') || ''
|
|
234
|
+
try {
|
|
235
|
+
const resolved = new URL(rawHref, pageUrl)
|
|
236
|
+
if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:') return
|
|
237
|
+
resolved.hash = ''
|
|
238
|
+
const href = resolved.toString()
|
|
239
|
+
if (!links.includes(href)) links.push(href)
|
|
240
|
+
} catch {
|
|
241
|
+
// Ignore malformed links from the crawled page.
|
|
242
|
+
}
|
|
243
|
+
})
|
|
244
|
+
return links
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
async function extractReadablePage(fetchUrl: string): Promise<ExtractedWebPage> {
|
|
248
|
+
const url = normalizeHttpUrl(fetchUrl)
|
|
249
|
+
const res = await fetch(url, {
|
|
250
|
+
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
|
|
251
|
+
signal: AbortSignal.timeout(15000),
|
|
252
|
+
})
|
|
253
|
+
if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`)
|
|
254
|
+
const contentType = res.headers.get('content-type') || ''
|
|
255
|
+
if (contentType.includes('application/pdf')) {
|
|
256
|
+
const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
|
|
257
|
+
const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
|
|
258
|
+
const arrayBuffer = await res.arrayBuffer()
|
|
259
|
+
const result = await pdfParse(Buffer.from(arrayBuffer))
|
|
260
|
+
return { url, title: url, text: result.text, links: [] }
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const html = await res.text()
|
|
264
|
+
const $ = cheerio.load(html)
|
|
265
|
+
const title = $('title').first().text().replace(/\s+/g, ' ').trim() || url
|
|
266
|
+
const links = extractLinks($, url)
|
|
267
|
+
$('script, style, noscript, nav, footer, header').remove()
|
|
268
|
+
const main = $('article, main, [role="main"]').first()
|
|
269
|
+
const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
|
|
270
|
+
return { url, title, text, links }
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function formatExtractedPage(page: ExtractedWebPage): string {
|
|
274
|
+
const lines = [`Title: ${page.title}`, `URL: ${page.url}`, '', page.text || '(no readable text found)']
|
|
275
|
+
return truncate(lines.join('\n'), MAX_OUTPUT)
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function formatCrawlResults(startUrl: string, pages: ExtractedWebPage[]): string {
|
|
279
|
+
if (pages.length === 0) return `No crawl results found for: ${startUrl}`
|
|
280
|
+
const sections = [`Crawl results for: ${startUrl}`, `Pages crawled: ${pages.length}`]
|
|
281
|
+
for (let index = 0; index < pages.length; index++) {
|
|
282
|
+
const page = pages[index]
|
|
283
|
+
const text = truncate(page.text || '(no readable text found)', 1200)
|
|
284
|
+
sections.push(`${index + 1}. ${page.title}\nURL: ${page.url}\nText: ${text}`)
|
|
285
|
+
}
|
|
286
|
+
return truncate(sections.join('\n\n'), MAX_OUTPUT)
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async function executeWebExtractAction(normalized: Record<string, unknown>) {
|
|
290
|
+
const rawUrl = String(normalized.url || normalized.query || '')
|
|
291
|
+
if (!rawUrl.trim()) return 'Error: "url" is required for extract action.'
|
|
292
|
+
try {
|
|
293
|
+
return formatExtractedPage(await extractReadablePage(rawUrl))
|
|
294
|
+
} catch (err: unknown) {
|
|
295
|
+
return `Error: ${errorMessage(err)}`
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async function executeWebCrawlAction(normalized: Record<string, unknown>) {
|
|
300
|
+
const rawUrl = String(normalized.url || normalized.query || '')
|
|
301
|
+
if (!rawUrl.trim()) return 'Error: "url" is required for crawl action.'
|
|
302
|
+
|
|
303
|
+
let startUrl: string
|
|
304
|
+
try {
|
|
305
|
+
startUrl = normalizeHttpUrl(rawUrl)
|
|
306
|
+
} catch (err: unknown) {
|
|
307
|
+
return `Error: ${errorMessage(err)}`
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const maxPages = clampNumber(normalized.maxPages ?? normalized.maxResults, 5, 1, 25)
|
|
311
|
+
const maxDepth = clampNumber(normalized.maxDepth, 1, 0, 3)
|
|
312
|
+
const includeExternal = normalized.includeExternal === true || normalized.sameOrigin === false
|
|
313
|
+
const startOrigin = new URL(startUrl).origin
|
|
314
|
+
const queue: Array<{ url: string; depth: number }> = [{ url: startUrl, depth: 0 }]
|
|
315
|
+
const seen = new Set<string>()
|
|
316
|
+
const pages: ExtractedWebPage[] = []
|
|
317
|
+
|
|
318
|
+
while (queue.length > 0 && pages.length < maxPages) {
|
|
319
|
+
const next = queue.shift()
|
|
320
|
+
if (!next) break
|
|
321
|
+
if (seen.has(next.url)) continue
|
|
322
|
+
seen.add(next.url)
|
|
323
|
+
|
|
324
|
+
let page: ExtractedWebPage
|
|
325
|
+
try {
|
|
326
|
+
page = await extractReadablePage(next.url)
|
|
327
|
+
} catch (err: unknown) {
|
|
328
|
+
page = { url: next.url, title: next.url, text: `Error: ${errorMessage(err)}`, links: [] }
|
|
329
|
+
}
|
|
330
|
+
pages.push(page)
|
|
331
|
+
|
|
332
|
+
if (next.depth >= maxDepth) continue
|
|
333
|
+
for (const link of page.links) {
|
|
334
|
+
if (seen.has(link)) continue
|
|
335
|
+
if (!includeExternal && new URL(link).origin !== startOrigin) continue
|
|
336
|
+
if (queue.some((entry) => entry.url === link)) continue
|
|
337
|
+
queue.push({ url: link, depth: next.depth + 1 })
|
|
338
|
+
if (queue.length + seen.size >= maxPages * 4) break
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
return formatCrawlResults(startUrl, pages)
|
|
343
|
+
}
|
|
344
|
+
|
|
202
345
|
async function executeWebAction(args: Record<string, unknown>) {
|
|
203
346
|
const normalized = normalizeToolInputArgs(args)
|
|
204
347
|
const { query, url, maxResults } = normalized as { query?: string; url?: string; maxResults?: number }
|
|
@@ -219,32 +362,13 @@ async function executeWebAction(args: Record<string, unknown>) {
|
|
|
219
362
|
const results = await provider.search(searchQuery, limit)
|
|
220
363
|
if (results.length === 0) return 'No results found.'
|
|
221
364
|
return formatWebSearchResults(searchQuery, results)
|
|
222
|
-
} else if (action === 'fetch') {
|
|
365
|
+
} else if (action === 'fetch' || action === 'extract') {
|
|
223
366
|
const fetchUrl = url || query
|
|
224
|
-
if (!fetchUrl) return
|
|
225
|
-
const
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if (!res.ok) return `HTTP ${res.status}: ${res.statusText}`
|
|
230
|
-
const contentType = res.headers.get('content-type') || ''
|
|
231
|
-
if (contentType.includes('application/pdf')) {
|
|
232
|
-
try {
|
|
233
|
-
const pdfMod = await import(/* webpackIgnore: true */ 'pdf-parse')
|
|
234
|
-
const pdfParse = ((pdfMod as Record<string, unknown>).default ?? pdfMod) as (buf: Buffer) => Promise<{ text: string }>
|
|
235
|
-
const arrayBuffer = await res.arrayBuffer()
|
|
236
|
-
const result = await pdfParse(Buffer.from(arrayBuffer))
|
|
237
|
-
return truncate(result.text, MAX_OUTPUT)
|
|
238
|
-
} catch (err: unknown) {
|
|
239
|
-
return `Error parsing PDF: ${errorMessage(err)}`
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
const html = await res.text()
|
|
243
|
-
const $ = cheerio.load(html)
|
|
244
|
-
$('script, style, noscript, nav, footer, header').remove()
|
|
245
|
-
const main = $('article, main, [role="main"]').first()
|
|
246
|
-
const text = (main.length ? main.text() : $('body').text()).replace(/\s+/g, ' ').trim()
|
|
247
|
-
return truncate(text, MAX_OUTPUT)
|
|
367
|
+
if (!fetchUrl) return `Error: "url" is required for ${action} action.`
|
|
368
|
+
const page = await extractReadablePage(fetchUrl)
|
|
369
|
+
return action === 'extract' ? formatExtractedPage(page) : truncate(page.text, MAX_OUTPUT)
|
|
370
|
+
} else if (action === 'crawl') {
|
|
371
|
+
return executeWebCrawlAction(normalized)
|
|
248
372
|
} else if (action === 'api') {
|
|
249
373
|
return executeWebApiAction(normalized)
|
|
250
374
|
}
|
|
@@ -259,21 +383,25 @@ async function executeWebAction(args: Record<string, unknown>) {
|
|
|
259
383
|
*/
|
|
260
384
|
const WebExtension: Extension = {
|
|
261
385
|
name: 'Core Web',
|
|
262
|
-
description: 'Search the web,
|
|
386
|
+
description: 'Search the web, extract pages, crawl sites, and make HTTP API calls.',
|
|
263
387
|
hooks: {
|
|
264
|
-
getCapabilityDescription: () => 'I can use
|
|
388
|
+
getCapabilityDescription: () => 'I can use `web_search` for fresh research, `web_extract` for a specific URL, `web_crawl` for bounded multi-page site reads, and the unified `web` tool for search, fetch, crawl, and raw HTTP API calls.',
|
|
265
389
|
} as ExtensionHooks,
|
|
266
390
|
tools: [
|
|
267
391
|
{
|
|
268
392
|
name: 'web',
|
|
269
|
-
description: 'Unified web access tool. Actions: search (web search), fetch (read URL content), api (raw HTTP request with method/headers/body).',
|
|
393
|
+
description: 'Unified web access tool. Actions: search (web search), fetch/extract (read URL content), crawl (bounded same-origin crawl), api (raw HTTP request with method/headers/body).',
|
|
270
394
|
parameters: {
|
|
271
395
|
type: 'object',
|
|
272
396
|
properties: {
|
|
273
|
-
action: { type: 'string', enum: ['search', 'fetch', 'api'] },
|
|
397
|
+
action: { type: 'string', enum: ['search', 'fetch', 'extract', 'crawl', 'api'] },
|
|
274
398
|
query: { type: 'string' },
|
|
275
399
|
url: { type: 'string' },
|
|
276
400
|
maxResults: { type: 'number' },
|
|
401
|
+
maxPages: { type: 'number', description: 'Maximum pages for crawl action, default 5, max 25' },
|
|
402
|
+
maxDepth: { type: 'number', description: 'Maximum crawl depth, default 1, max 3' },
|
|
403
|
+
includeExternal: { type: 'boolean', description: 'Allow crawl to leave the starting origin, default false' },
|
|
404
|
+
sameOrigin: { type: 'boolean', description: 'Keep crawl on the starting origin when true, default true' },
|
|
277
405
|
method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS'], description: 'HTTP method (for api action)' },
|
|
278
406
|
headers: { type: 'object', additionalProperties: { type: 'string' }, description: 'Request headers (for api action)' },
|
|
279
407
|
body: { type: 'string', description: 'Request body (for api action)' },
|
|
@@ -283,6 +411,71 @@ const WebExtension: Extension = {
|
|
|
283
411
|
required: ['action']
|
|
284
412
|
},
|
|
285
413
|
execute: async (args) => executeWebAction(args)
|
|
414
|
+
},
|
|
415
|
+
{
|
|
416
|
+
name: 'web_search',
|
|
417
|
+
description: 'Search the web and return ranked results with URLs and snippets.',
|
|
418
|
+
parameters: {
|
|
419
|
+
type: 'object',
|
|
420
|
+
properties: {
|
|
421
|
+
query: { type: 'string' },
|
|
422
|
+
maxResults: { type: 'number' },
|
|
423
|
+
},
|
|
424
|
+
required: ['query'],
|
|
425
|
+
},
|
|
426
|
+
planning: {
|
|
427
|
+
capabilities: ['research.search'],
|
|
428
|
+
disciplineGuidance: ['Use `web_search` for fresh information, then fetch or extract only the sources you need.'],
|
|
429
|
+
},
|
|
430
|
+
execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'search' }),
|
|
431
|
+
},
|
|
432
|
+
{
|
|
433
|
+
name: 'web_fetch',
|
|
434
|
+
description: 'Read a specific URL and return readable page text.',
|
|
435
|
+
parameters: {
|
|
436
|
+
type: 'object',
|
|
437
|
+
properties: { url: { type: 'string' } },
|
|
438
|
+
required: ['url'],
|
|
439
|
+
},
|
|
440
|
+
planning: {
|
|
441
|
+
capabilities: ['research.fetch'],
|
|
442
|
+
disciplineGuidance: ['Use `web_fetch` when you already have a URL and only need the readable text.'],
|
|
443
|
+
},
|
|
444
|
+
execute: async (args) => executeWebAction({ ...normalizeToolInputArgs(args), action: 'fetch' }),
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
name: 'web_extract',
|
|
448
|
+
description: 'Extract readable content from a URL with title and source URL included.',
|
|
449
|
+
parameters: {
|
|
450
|
+
type: 'object',
|
|
451
|
+
properties: { url: { type: 'string' } },
|
|
452
|
+
required: ['url'],
|
|
453
|
+
},
|
|
454
|
+
planning: {
|
|
455
|
+
capabilities: ['research.fetch'],
|
|
456
|
+
disciplineGuidance: ['Use `web_extract` for source-grounded page reads where the title and URL should stay attached to the extracted text.'],
|
|
457
|
+
},
|
|
458
|
+
execute: async (args) => executeWebExtractAction(normalizeToolInputArgs(args)),
|
|
459
|
+
},
|
|
460
|
+
{
|
|
461
|
+
name: 'web_crawl',
|
|
462
|
+
description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
|
|
463
|
+
parameters: {
|
|
464
|
+
type: 'object',
|
|
465
|
+
properties: {
|
|
466
|
+
url: { type: 'string' },
|
|
467
|
+
maxPages: { type: 'number' },
|
|
468
|
+
maxDepth: { type: 'number' },
|
|
469
|
+
includeExternal: { type: 'boolean' },
|
|
470
|
+
sameOrigin: { type: 'boolean' },
|
|
471
|
+
},
|
|
472
|
+
required: ['url'],
|
|
473
|
+
},
|
|
474
|
+
planning: {
|
|
475
|
+
capabilities: ['research.crawl'],
|
|
476
|
+
disciplineGuidance: ['Use `web_crawl` only when the task needs multiple pages from the same site. Keep maxPages low and summarize after one crawl.'],
|
|
477
|
+
},
|
|
478
|
+
execute: async (args) => executeWebCrawlAction(normalizeToolInputArgs(args)),
|
|
286
479
|
}
|
|
287
480
|
]
|
|
288
481
|
}
|
|
@@ -307,6 +500,40 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
|
|
|
307
500
|
}
|
|
308
501
|
)
|
|
309
502
|
)
|
|
503
|
+
tools.push(
|
|
504
|
+
tool(
|
|
505
|
+
async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'search' }),
|
|
506
|
+
{
|
|
507
|
+
name: 'web_search',
|
|
508
|
+
description: 'Search the web and return ranked results with URLs and snippets.',
|
|
509
|
+
schema: z.object({}).passthrough()
|
|
510
|
+
}
|
|
511
|
+
),
|
|
512
|
+
tool(
|
|
513
|
+
async (args) => executeWebAction({ ...normalizeToolInputArgs((args ?? {}) as Record<string, unknown>), action: 'fetch' }),
|
|
514
|
+
{
|
|
515
|
+
name: 'web_fetch',
|
|
516
|
+
description: 'Read a specific URL and return readable page text.',
|
|
517
|
+
schema: z.object({}).passthrough()
|
|
518
|
+
}
|
|
519
|
+
),
|
|
520
|
+
tool(
|
|
521
|
+
async (args) => executeWebExtractAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
|
|
522
|
+
{
|
|
523
|
+
name: 'web_extract',
|
|
524
|
+
description: 'Extract readable content from a URL with title and source URL included.',
|
|
525
|
+
schema: z.object({}).passthrough()
|
|
526
|
+
}
|
|
527
|
+
),
|
|
528
|
+
tool(
|
|
529
|
+
async (args) => executeWebCrawlAction(normalizeToolInputArgs((args ?? {}) as Record<string, unknown>)),
|
|
530
|
+
{
|
|
531
|
+
name: 'web_crawl',
|
|
532
|
+
description: 'Crawl a small set of pages starting from one URL. Same-origin by default, bounded by maxPages and maxDepth.',
|
|
533
|
+
schema: z.object({}).passthrough()
|
|
534
|
+
}
|
|
535
|
+
)
|
|
536
|
+
)
|
|
310
537
|
}
|
|
311
538
|
|
|
312
539
|
// Browser tool (kept as direct injection for now due to complexity)
|
|
@@ -8,6 +8,9 @@ import {
|
|
|
8
8
|
type TaskCompletionValidation,
|
|
9
9
|
} from '@/lib/server/tasks/task-validation'
|
|
10
10
|
import { syncTaskExecutionPolicyState } from '@/lib/server/tasks/task-execution-policy'
|
|
11
|
+
import { createMission, startMission } from '@/lib/server/missions/mission-service'
|
|
12
|
+
import { getMission } from '@/lib/server/missions/mission-repository'
|
|
13
|
+
import { loadSessions } from '@/lib/server/storage'
|
|
11
14
|
|
|
12
15
|
export interface BuildBoardTaskInput {
|
|
13
16
|
id?: string
|
|
@@ -84,6 +87,7 @@ export interface PrepareScheduledTaskRunOptions {
|
|
|
84
87
|
| 'agentId'
|
|
85
88
|
| 'taskPrompt'
|
|
86
89
|
| 'linkedTaskId'
|
|
90
|
+
| 'linkedMissionId'
|
|
87
91
|
| 'runNumber'
|
|
88
92
|
| 'createdInSessionId'
|
|
89
93
|
| 'createdByAgentId'
|
|
@@ -98,20 +102,45 @@ export interface PrepareScheduledTaskRunOptions {
|
|
|
98
102
|
scheduleSignature?: string | null
|
|
99
103
|
}
|
|
100
104
|
|
|
105
|
+
function ensureScheduleMission(schedule: PrepareScheduledTaskRunOptions['schedule']): string | null {
|
|
106
|
+
const existingMissionId = typeof schedule.linkedMissionId === 'string' ? schedule.linkedMissionId.trim() : ''
|
|
107
|
+
if (existingMissionId && getMission(existingMissionId)) return existingMissionId
|
|
108
|
+
|
|
109
|
+
const rootSessionId = typeof schedule.createdInSessionId === 'string' ? schedule.createdInSessionId.trim() : ''
|
|
110
|
+
if (!rootSessionId) return existingMissionId || null
|
|
111
|
+
const sessions = loadSessions()
|
|
112
|
+
if (!sessions[rootSessionId]) return existingMissionId || null
|
|
113
|
+
|
|
114
|
+
const mission = createMission({
|
|
115
|
+
title: `Scheduled task: ${schedule.name}`,
|
|
116
|
+
goal: schedule.taskPrompt || schedule.name,
|
|
117
|
+
successCriteria: ['Scheduled run is queued, executed, and reported back to the task board.'],
|
|
118
|
+
rootSessionId,
|
|
119
|
+
agentIds: [schedule.agentId].filter(Boolean),
|
|
120
|
+
reportSchedule: null,
|
|
121
|
+
})
|
|
122
|
+
startMission(mission.id)
|
|
123
|
+
schedule.linkedMissionId = mission.id
|
|
124
|
+
return mission.id
|
|
125
|
+
}
|
|
126
|
+
|
|
101
127
|
export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions): { taskId: string; task: BoardTask } {
|
|
102
128
|
const { schedule, tasks, now, scheduleSignature } = params
|
|
103
129
|
const title = `[Sched] ${schedule.name} (run #${schedule.runNumber})`
|
|
104
130
|
const existingTaskId = typeof schedule.linkedTaskId === 'string' ? schedule.linkedTaskId : ''
|
|
105
131
|
const existingTask = existingTaskId ? tasks[existingTaskId] : null
|
|
132
|
+
const missionId = ensureScheduleMission(schedule)
|
|
106
133
|
|
|
107
134
|
if (existingTask && existingTask.status !== 'queued' && existingTask.status !== 'running') {
|
|
135
|
+
const task = resetTaskForRerun(existingTask, {
|
|
136
|
+
title,
|
|
137
|
+
now,
|
|
138
|
+
runNumber: schedule.runNumber,
|
|
139
|
+
})
|
|
140
|
+
task.missionId = missionId
|
|
108
141
|
return {
|
|
109
142
|
taskId: existingTaskId,
|
|
110
|
-
task
|
|
111
|
-
title,
|
|
112
|
-
now,
|
|
113
|
-
runNumber: schedule.runNumber,
|
|
114
|
-
}),
|
|
143
|
+
task,
|
|
115
144
|
}
|
|
116
145
|
}
|
|
117
146
|
|
|
@@ -125,6 +154,7 @@ export function prepareScheduledTaskRun(params: PrepareScheduledTaskRunOptions):
|
|
|
125
154
|
sourceScheduleId: schedule.id,
|
|
126
155
|
sourceScheduleName: schedule.name,
|
|
127
156
|
sourceScheduleKey: scheduleSignature || null,
|
|
157
|
+
missionId,
|
|
128
158
|
createdInSessionId: schedule.createdInSessionId || null,
|
|
129
159
|
createdByAgentId: schedule.createdByAgentId || null,
|
|
130
160
|
followupConnectorId: schedule.followupConnectorId || null,
|
|
@@ -3,7 +3,7 @@ const EXTENSION_ALIAS_GROUPS: string[][] = [
|
|
|
3
3
|
['execute', 'sandbox'],
|
|
4
4
|
['files', 'read_file', 'write_file', 'list_files', 'copy_file', 'move_file', 'delete_file', 'send_file'],
|
|
5
5
|
['edit_file'],
|
|
6
|
-
['web', 'web_search', 'web_fetch', 'http_request', 'http'],
|
|
6
|
+
['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request', 'http'],
|
|
7
7
|
['browser', 'openclaw_browser'],
|
|
8
8
|
['delegate', 'claude_code', 'codex_cli', 'opencode_cli', 'gemini_cli', 'copilot_cli', 'droid_cli', 'cursor_cli', 'qwen_code_cli', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'],
|
|
9
9
|
['manage_platform'],
|
|
@@ -255,12 +255,12 @@ describe('explicit allows override mode blocks', () => {
|
|
|
255
255
|
// Category blocks
|
|
256
256
|
// ---------------------------------------------------------------------------
|
|
257
257
|
describe('category blocks', () => {
|
|
258
|
-
it('blocking network category blocks web
|
|
259
|
-
const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'memory'], {
|
|
258
|
+
it('blocking network category blocks granular web tools', () => {
|
|
259
|
+
const d = resolveSessionToolPolicy(['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'memory'], {
|
|
260
260
|
capabilityBlockedCategories: ['network'],
|
|
261
261
|
})
|
|
262
262
|
assert.deepStrictEqual(d.enabledExtensions, ['memory'])
|
|
263
|
-
assert.equal(d.blockedExtensions.length,
|
|
263
|
+
assert.equal(d.blockedExtensions.length, 5)
|
|
264
264
|
for (const b of d.blockedExtensions) {
|
|
265
265
|
assert.match(b.reason, /category "network"/)
|
|
266
266
|
}
|
|
@@ -49,9 +49,11 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
|
|
|
49
49
|
move_file: { categories: ['filesystem'], concreteTools: ['move_file'] },
|
|
50
50
|
edit_file: { categories: ['filesystem'], concreteTools: ['edit_file'] },
|
|
51
51
|
delete_file: { categories: ['filesystem'], concreteTools: ['delete_file'], destructive: true },
|
|
52
|
-
web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch'] },
|
|
52
|
+
web: { categories: ['network'], concreteTools: ['web', 'web_search', 'web_fetch', 'web_extract', 'web_crawl'] },
|
|
53
53
|
web_search: { categories: ['network'], concreteTools: ['web_search'] },
|
|
54
54
|
web_fetch: { categories: ['network'], concreteTools: ['web_fetch'] },
|
|
55
|
+
web_extract: { categories: ['network'], concreteTools: ['web_extract'] },
|
|
56
|
+
web_crawl: { categories: ['network'], concreteTools: ['web_crawl'] },
|
|
55
57
|
browser: { categories: ['browser', 'network'], concreteTools: ['browser', 'openclaw_browser'] },
|
|
56
58
|
delegate: { categories: ['delegation', 'execution'], concreteTools: ['delegate', 'delegate_to_claude_code', 'delegate_to_codex_cli', 'delegate_to_opencode_cli', 'delegate_to_gemini_cli', 'delegate_to_copilot_cli', 'delegate_to_droid_cli', 'delegate_to_cursor_cli', 'delegate_to_qwen_code_cli'] },
|
|
57
59
|
claude_code: { categories: ['delegation', 'execution'], concreteTools: ['delegate_to_claude_code'] },
|
|
@@ -85,6 +87,7 @@ const TOOL_DESCRIPTORS: Record<string, ToolDescriptor> = {
|
|
|
85
87
|
spawn_subagent: { categories: ['delegation', 'platform'], concreteTools: ['spawn_subagent', 'delegate_to_agent'] },
|
|
86
88
|
context_mgmt: { categories: ['memory'], concreteTools: ['context_mgmt', 'context_status', 'context_summarize'] },
|
|
87
89
|
extension_creator: { categories: ['filesystem', 'execution'], concreteTools: ['extension_creator', 'extension_creator_tool'] },
|
|
90
|
+
wallet: { categories: ['outbound'], concreteTools: ['wallet'] },
|
|
88
91
|
mailbox: { categories: ['network', 'platform', 'outbound'], concreteTools: ['mailbox', 'inbox'] },
|
|
89
92
|
ask_human: { categories: ['platform'], concreteTools: ['ask_human', 'human_loop'] },
|
|
90
93
|
google_workspace: { categories: ['network'], concreteTools: ['google_workspace', 'gws'] },
|
|
@@ -12,10 +12,11 @@ function uniqueExtensionId(prefix: string): string {
|
|
|
12
12
|
|
|
13
13
|
describe('tool-planning', () => {
|
|
14
14
|
it('collects core planning metadata for aliased built-in tools', () => {
|
|
15
|
-
const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'browser', 'manage_connectors'])
|
|
15
|
+
const view = getEnabledToolPlanningView(['web_search', 'web_fetch', 'web_extract', 'web_crawl', 'browser', 'manage_connectors'])
|
|
16
16
|
|
|
17
17
|
assert.deepEqual(view.displayToolIds, ['browser', 'manage_connectors', 'web'])
|
|
18
18
|
assert.deepEqual(getToolsForCapability(['web_search'], TOOL_CAPABILITY.researchSearch), ['web_search'])
|
|
19
|
+
assert.deepEqual(getToolsForCapability(['web_crawl'], TOOL_CAPABILITY.researchCrawl), ['web_crawl'])
|
|
19
20
|
assert.deepEqual(getToolsForCapability(['manage_connectors'], TOOL_CAPABILITY.deliveryVoiceNote), ['connector_message_tool'])
|
|
20
21
|
})
|
|
21
22
|
|
|
@@ -7,6 +7,7 @@ import { canonicalizeExtensionId, expandExtensionIds } from './tool-aliases'
|
|
|
7
7
|
export const TOOL_CAPABILITY = {
|
|
8
8
|
researchSearch: 'research.search',
|
|
9
9
|
researchFetch: 'research.fetch',
|
|
10
|
+
researchCrawl: 'research.crawl',
|
|
10
11
|
browserNavigate: 'browser.navigate',
|
|
11
12
|
browserCapture: 'browser.capture',
|
|
12
13
|
artifactPdf: 'artifact.pdf',
|
|
@@ -98,6 +99,36 @@ const CORE_TOOL_PLANNING: Record<string, LegacyToolPlanningEntry[]> = {
|
|
|
98
99
|
},
|
|
99
100
|
],
|
|
100
101
|
},
|
|
102
|
+
{
|
|
103
|
+
toolName: 'web_extract',
|
|
104
|
+
capabilities: [TOOL_CAPABILITY.researchFetch],
|
|
105
|
+
disciplineGuidance: [
|
|
106
|
+
'For `web_extract`, use `{"url":"https://..."}` when source title and URL should remain attached to extracted page text.',
|
|
107
|
+
'Extract the exact pages you need, then synthesize. Do not extract the same page repeatedly.',
|
|
108
|
+
],
|
|
109
|
+
requestMatchers: [
|
|
110
|
+
{
|
|
111
|
+
capability: TOOL_CAPABILITY.researchFetch,
|
|
112
|
+
patterns: ['extract', 'readable content', 'page text', 'source text'],
|
|
113
|
+
requireLiteralUrl: true,
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
toolName: 'web_crawl',
|
|
119
|
+
capabilities: [TOOL_CAPABILITY.researchCrawl],
|
|
120
|
+
disciplineGuidance: [
|
|
121
|
+
'For `web_crawl`, use `{"url":"https://...","maxPages":5,"maxDepth":1}` only when a task needs several pages from the same site.',
|
|
122
|
+
'Keep crawls bounded and summarize after one crawl. Prefer `web_extract` for a single known URL.',
|
|
123
|
+
],
|
|
124
|
+
requestMatchers: [
|
|
125
|
+
{
|
|
126
|
+
capability: TOOL_CAPABILITY.researchCrawl,
|
|
127
|
+
patterns: ['crawl', 'site map', 'sitemap', 'multiple pages', 'whole site', 'scan site'],
|
|
128
|
+
requireLiteralUrl: true,
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
},
|
|
101
132
|
],
|
|
102
133
|
browser: [
|
|
103
134
|
{
|
|
@@ -4,11 +4,11 @@ const INJECTION_PATTERNS: Array<{ code: string; re: RegExp; note: string }> = [
|
|
|
4
4
|
{ code: 'ignore_instructions', re: /\bignore (?:all |any |the )?(?:previous|prior|above|system|developer) instructions\b/i, note: 'tries to override existing instructions' },
|
|
5
5
|
{ code: 'reveal_prompt', re: /\b(?:reveal|show|print|dump)\b[\s\S]{0,40}\b(?:system prompt|developer prompt|hidden prompt)\b/i, note: 'asks for hidden prompt data' },
|
|
6
6
|
{ code: 'credential_theft', re: /\b(?:api key|token|password|secret|credential)s?\b[\s\S]{0,40}\b(?:send|share|reveal|print|dump|exfiltrat)/i, note: 'asks for secrets or credentials' },
|
|
7
|
-
{ code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
|
|
7
|
+
{ code: 'tool_override', re: /\b(?:call|use|run)\b[\s\S]{0,40}\b(?:shell|terminal|browser|http_request|web_fetch|web_extract|web_crawl|connector_message_tool)\b[\s\S]{0,40}\b(?:without|ignore)\b/i, note: 'tries to direct tool use by bypassing policy' },
|
|
8
8
|
{ code: 'workflow_override', re: /\b(?:act as|pretend to be)\b[\s\S]{0,40}\b(?:system|developer|administrator|operator)\b/i, note: 'tries to impersonate a higher-priority role' },
|
|
9
9
|
]
|
|
10
10
|
|
|
11
|
-
const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'http_request'])
|
|
11
|
+
const WEB_TOOL_NAMES = new Set(['browser', 'web_search', 'web_fetch', 'web_extract', 'web_crawl', 'http_request'])
|
|
12
12
|
|
|
13
13
|
function normalizeMode(value: unknown): 'off' | 'warn' | 'block' {
|
|
14
14
|
const normalized = typeof value === 'string' ? value.trim().toLowerCase() : ''
|
package/src/types/schedule.ts
CHANGED
|
@@ -3,7 +3,7 @@ import type { ExtensionManagedResourceMarker } from './extension'
|
|
|
3
3
|
export type ScheduleType = 'cron' | 'interval' | 'once'
|
|
4
4
|
export type ScheduleStatus = 'active' | 'paused' | 'completed' | 'failed' | 'archived'
|
|
5
5
|
export type ScheduleTaskMode = 'task' | 'wake_only' | 'protocol'
|
|
6
|
-
export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed'
|
|
6
|
+
export type ScheduleHistoryAction = 'created' | 'updated' | 'archived' | 'restored' | 'run_started' | 'skipped' | 'failed' | 'repaired'
|
|
7
7
|
|
|
8
8
|
export interface ScheduleHistoryChange {
|
|
9
9
|
field: string
|
|
@@ -55,7 +55,7 @@ export interface Schedule {
|
|
|
55
55
|
nextRunAt?: number
|
|
56
56
|
/** IANA timezone for schedule evaluation (default: system local) */
|
|
57
57
|
timezone?: string | null
|
|
58
|
-
/**
|
|
58
|
+
/** Deterministic stagger window in seconds added to nextRunAt to avoid thundering herd */
|
|
59
59
|
staggerSec?: number | null
|
|
60
60
|
/** Last delivery status for this schedule */
|
|
61
61
|
lastDeliveryStatus?: 'ok' | 'error' | null
|
package/src/types/session.ts
CHANGED
package/src/types/task.ts
CHANGED