@swarmclawai/swarmclaw 1.9.21 → 1.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/package.json +2 -2
- package/src/components/chat/activity-moment.tsx +4 -0
- package/src/components/chat/tool-call-bubble.tsx +6 -0
- package/src/components/schedules/schedule-console.tsx +3 -0
- package/src/lib/server/capability-router.test.ts +4 -4
- package/src/lib/server/capability-router.ts +1 -0
- package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +27 -0
- package/src/lib/server/chat-execution/chat-execution-utils.ts +21 -0
- package/src/lib/server/chat-execution/iteration-event-handler.ts +1 -1
- package/src/lib/server/chat-execution/stream-continuation.ts +6 -2
- package/src/lib/server/plugins-advanced.test.ts +7 -3
- package/src/lib/server/runtime/scheduler.test.ts +129 -0
- package/src/lib/server/runtime/scheduler.ts +62 -35
- package/src/lib/server/schedules/schedule-history.test.ts +14 -0
- package/src/lib/server/schedules/schedule-history.ts +1 -0
- package/src/lib/server/schedules/schedule-lifecycle.ts +5 -28
- package/src/lib/server/schedules/schedule-normalization.ts +6 -28
- package/src/lib/server/schedules/schedule-timing.test.ts +80 -0
- package/src/lib/server/schedules/schedule-timing.ts +179 -0
- package/src/lib/server/session-tools/web-crawl.test.ts +106 -0
- package/src/lib/server/session-tools/web-inputs.test.ts +5 -0
- package/src/lib/server/session-tools/web-utils.ts +8 -2
- package/src/lib/server/session-tools/web.ts +256 -29
- package/src/lib/server/storage.ts +2 -0
- package/src/lib/server/tasks/task-lifecycle.ts +35 -5
- package/src/lib/server/tool-aliases.ts +1 -1
- package/src/lib/server/tool-capability-policy-advanced.test.ts +3 -3
- package/src/lib/server/tool-capability-policy.ts +4 -1
- package/src/lib/server/tool-planning.test.ts +2 -1
- package/src/lib/server/tool-planning.ts +31 -0
- package/src/lib/server/untrusted-content.ts +2 -2
- package/src/types/schedule.ts +2 -2
- package/src/types/session.ts +2 -0
- package/src/types/task.ts +1 -0
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { CronExpressionParser } from 'cron-parser'
|
|
2
|
-
|
|
3
1
|
import { genId } from '@/lib/id'
|
|
4
2
|
import type { BoardTask, Schedule, ScheduleStatus, Session } from '@/types'
|
|
5
3
|
import { pushMainLoopEventToMainSessions } from '@/lib/server/agents/main-agent-loop'
|
|
@@ -19,6 +17,7 @@ import {
|
|
|
19
17
|
import { notify } from '@/lib/server/ws-hub'
|
|
20
18
|
import { getScheduleClusterIds } from '@/lib/server/schedules/schedule-service'
|
|
21
19
|
import { appendScheduleHistoryEntry } from '@/lib/server/schedules/schedule-history'
|
|
20
|
+
import { computeScheduleNextRunAt } from '@/lib/server/schedules/schedule-timing'
|
|
22
21
|
|
|
23
22
|
type RestorableScheduleStatus = Exclude<ScheduleStatus, 'archived'>
|
|
24
23
|
|
|
@@ -50,33 +49,11 @@ export interface SchedulePurgeResult {
|
|
|
50
49
|
}
|
|
51
50
|
|
|
52
51
|
function computeNextRunAt(schedule: Pick<Schedule, 'scheduleType' | 'cron' | 'intervalMs' | 'runAt' | 'timezone' | 'staggerSec'>, now: number): number | undefined {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if (schedule.scheduleType === 'once') {
|
|
59
|
-
return typeof schedule.runAt === 'number' && Number.isFinite(schedule.runAt)
|
|
60
|
-
? applyStagger(schedule.runAt)
|
|
61
|
-
: undefined
|
|
62
|
-
}
|
|
63
|
-
if (schedule.scheduleType === 'interval') {
|
|
64
|
-
return typeof schedule.intervalMs === 'number' && Number.isFinite(schedule.intervalMs)
|
|
65
|
-
? applyStagger(now + schedule.intervalMs)
|
|
66
|
-
: undefined
|
|
67
|
-
}
|
|
68
|
-
if (schedule.scheduleType === 'cron' && typeof schedule.cron === 'string' && schedule.cron.trim()) {
|
|
69
|
-
try {
|
|
70
|
-
const interval = CronExpressionParser.parse(
|
|
71
|
-
schedule.cron,
|
|
72
|
-
schedule.timezone ? { tz: schedule.timezone } : undefined,
|
|
73
|
-
)
|
|
74
|
-
return applyStagger(interval.next().getTime())
|
|
75
|
-
} catch {
|
|
76
|
-
return undefined
|
|
77
|
-
}
|
|
52
|
+
try {
|
|
53
|
+
return computeScheduleNextRunAt(schedule, now)
|
|
54
|
+
} catch {
|
|
55
|
+
return undefined
|
|
78
56
|
}
|
|
79
|
-
return undefined
|
|
80
57
|
}
|
|
81
58
|
|
|
82
59
|
function cloneSchedule(schedule: Schedule): Schedule {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from 'node:fs'
|
|
2
2
|
import path from 'node:path'
|
|
3
|
-
import { CronExpressionParser } from 'cron-parser'
|
|
4
3
|
import { WORKSPACE_DIR } from '@/lib/server/data-dir'
|
|
4
|
+
import { computeScheduleNextRunAt } from '@/lib/server/schedules/schedule-timing'
|
|
5
5
|
|
|
6
6
|
type SchedulePayload = Record<string, unknown>
|
|
7
7
|
|
|
@@ -86,15 +86,6 @@ function parseAtTimeToCron(atTime: string): string | null {
|
|
|
86
86
|
return `${minutes} ${hours} * * *`
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
/**
|
|
90
|
-
* Apply a random stagger offset (in seconds) to a timestamp.
|
|
91
|
-
*/
|
|
92
|
-
function applyStagger(timestamp: number, staggerSec: number | null | undefined): number {
|
|
93
|
-
if (!staggerSec || staggerSec <= 0) return timestamp
|
|
94
|
-
const offset = Math.floor(Math.random() * staggerSec * 1000)
|
|
95
|
-
return timestamp + offset
|
|
96
|
-
}
|
|
97
|
-
|
|
98
89
|
function normalizePositiveInt(value: unknown): number | null {
|
|
99
90
|
const parsed = typeof value === 'number'
|
|
100
91
|
? value
|
|
@@ -326,24 +317,11 @@ export function normalizeSchedulePayload(payload: SchedulePayload, opts: Normali
|
|
|
326
317
|
}
|
|
327
318
|
|
|
328
319
|
if (normalized.status !== 'archived' && normalized.nextRunAt == null) {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
try {
|
|
335
|
-
const cronTimezone = trimString(normalized.timezone)
|
|
336
|
-
const interval = CronExpressionParser.parse(
|
|
337
|
-
normalized.cron as string,
|
|
338
|
-
{
|
|
339
|
-
...(cronTimezone ? { tz: cronTimezone } : {}),
|
|
340
|
-
currentDate: new Date(now),
|
|
341
|
-
},
|
|
342
|
-
)
|
|
343
|
-
normalized.nextRunAt = applyStagger(interval.next().getTime(), normalized.staggerSec as number | null)
|
|
344
|
-
} catch {
|
|
345
|
-
return { ok: false, error: 'Error: invalid cron expression.' }
|
|
346
|
-
}
|
|
320
|
+
try {
|
|
321
|
+
const computedNextRunAt = computeScheduleNextRunAt(normalized, now)
|
|
322
|
+
if (computedNextRunAt != null) normalized.nextRunAt = computedNextRunAt
|
|
323
|
+
} catch {
|
|
324
|
+
return { ok: false, error: 'Error: invalid cron expression.' }
|
|
347
325
|
}
|
|
348
326
|
}
|
|
349
327
|
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
|
|
4
|
+
import {
|
|
5
|
+
assessScheduleNextRunRepair,
|
|
6
|
+
computeScheduleNextRunAt,
|
|
7
|
+
stableScheduleStaggerMs,
|
|
8
|
+
} from '@/lib/server/schedules/schedule-timing'
|
|
9
|
+
|
|
10
|
+
describe('schedule timing', () => {
|
|
11
|
+
it('computes cron next runs from the provided scheduler time', () => {
|
|
12
|
+
const nextRunAt = computeScheduleNextRunAt({
|
|
13
|
+
id: 'sched-daily',
|
|
14
|
+
name: 'Daily status',
|
|
15
|
+
agentId: 'agent-1',
|
|
16
|
+
scheduleType: 'cron',
|
|
17
|
+
cron: '0 8 * * *',
|
|
18
|
+
timezone: 'UTC',
|
|
19
|
+
}, Date.parse('2030-01-01T08:00:30.000Z'))
|
|
20
|
+
|
|
21
|
+
assert.equal(nextRunAt, Date.parse('2030-01-02T08:00:00.000Z'))
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('uses deterministic schedule stagger inside the configured window', () => {
|
|
25
|
+
const schedule = {
|
|
26
|
+
id: 'sched-staggered',
|
|
27
|
+
name: 'Staggered status',
|
|
28
|
+
agentId: 'agent-1',
|
|
29
|
+
scheduleType: 'cron',
|
|
30
|
+
cron: '0 8 * * *',
|
|
31
|
+
timezone: 'UTC',
|
|
32
|
+
staggerSec: 30,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const first = stableScheduleStaggerMs(schedule)
|
|
36
|
+
const second = stableScheduleStaggerMs(schedule)
|
|
37
|
+
|
|
38
|
+
assert.equal(first, second)
|
|
39
|
+
assert.ok(first >= 0)
|
|
40
|
+
assert.ok(first < 30_000)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('repairs stale future cron slots to the earliest upcoming slot', () => {
|
|
44
|
+
const assessment = assessScheduleNextRunRepair({
|
|
45
|
+
id: 'sched-stale',
|
|
46
|
+
name: 'Daily status',
|
|
47
|
+
agentId: 'agent-1',
|
|
48
|
+
scheduleType: 'cron',
|
|
49
|
+
cron: '0 8 * * *',
|
|
50
|
+
timezone: 'UTC',
|
|
51
|
+
status: 'active',
|
|
52
|
+
nextRunAt: Date.parse('2026-05-12T08:00:00.000Z'),
|
|
53
|
+
}, Date.parse('2026-05-06T07:30:00.000Z'))
|
|
54
|
+
|
|
55
|
+
assert.equal(assessment.ok, true)
|
|
56
|
+
assert.equal(assessment.repair, true)
|
|
57
|
+
if (assessment.ok && assessment.repair) {
|
|
58
|
+
assert.equal(assessment.reason, 'stale_future')
|
|
59
|
+
assert.equal(assessment.nextRunAt, Date.parse('2026-05-06T08:00:00.000Z'))
|
|
60
|
+
}
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
it('flags invalid due cron schedules before they launch', () => {
|
|
64
|
+
const assessment = assessScheduleNextRunRepair({
|
|
65
|
+
id: 'sched-invalid',
|
|
66
|
+
name: 'Broken cron',
|
|
67
|
+
agentId: 'agent-1',
|
|
68
|
+
scheduleType: 'cron',
|
|
69
|
+
cron: 'not a cron',
|
|
70
|
+
status: 'active',
|
|
71
|
+
nextRunAt: Date.parse('2026-05-06T07:00:00.000Z'),
|
|
72
|
+
}, Date.parse('2026-05-06T07:30:00.000Z'))
|
|
73
|
+
|
|
74
|
+
assert.equal(assessment.ok, false)
|
|
75
|
+
if (!assessment.ok) {
|
|
76
|
+
assert.equal(assessment.reason, 'invalid_cron')
|
|
77
|
+
assert.equal(assessment.previousNextRunAt, Date.parse('2026-05-06T07:00:00.000Z'))
|
|
78
|
+
}
|
|
79
|
+
})
|
|
80
|
+
})
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto'
|
|
2
|
+
|
|
3
|
+
import { CronExpressionParser } from 'cron-parser'
|
|
4
|
+
|
|
5
|
+
import type { ScheduleType } from '@/types'
|
|
6
|
+
|
|
7
|
+
export type ScheduleTimingRepairReason = 'missing' | 'invalid' | 'stale_future'
|
|
8
|
+
|
|
9
|
+
export type ScheduleTimingInput = {
|
|
10
|
+
id?: string | null
|
|
11
|
+
name?: string | null
|
|
12
|
+
agentId?: string | null
|
|
13
|
+
taskPrompt?: string | null
|
|
14
|
+
scheduleType?: ScheduleType | string | null
|
|
15
|
+
cron?: string | null
|
|
16
|
+
intervalMs?: number | null
|
|
17
|
+
runAt?: number | null
|
|
18
|
+
timezone?: string | null
|
|
19
|
+
staggerSec?: number | null
|
|
20
|
+
nextRunAt?: number | null
|
|
21
|
+
status?: string | null
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type ScheduleNextRunRepairAssessment =
|
|
25
|
+
| { ok: true; repair: false }
|
|
26
|
+
| {
|
|
27
|
+
ok: true
|
|
28
|
+
repair: true
|
|
29
|
+
reason: ScheduleTimingRepairReason
|
|
30
|
+
nextRunAt: number
|
|
31
|
+
previousNextRunAt: number | null
|
|
32
|
+
}
|
|
33
|
+
| {
|
|
34
|
+
ok: false
|
|
35
|
+
reason: 'invalid_cron'
|
|
36
|
+
previousNextRunAt: number | null
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const CRON_REPAIR_TOLERANCE_MS = 1_000
|
|
40
|
+
|
|
41
|
+
function trimString(value: unknown): string {
|
|
42
|
+
return typeof value === 'string' ? value.trim() : ''
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function normalizeTimestamp(value: unknown): number | null {
|
|
46
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) return null
|
|
47
|
+
const normalized = Math.trunc(value)
|
|
48
|
+
return normalized > 0 ? normalized : null
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function normalizeNow(value: number): number {
|
|
52
|
+
return Number.isFinite(value) ? Math.trunc(value) : Date.now()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function normalizeStaggerWindowMs(staggerSec: unknown): number {
|
|
56
|
+
if (typeof staggerSec !== 'number' || !Number.isFinite(staggerSec) || staggerSec <= 0) return 0
|
|
57
|
+
return Math.min(Math.trunc(staggerSec * 1000), Number.MAX_SAFE_INTEGER)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function stableScheduleKey(schedule: ScheduleTimingInput): string {
|
|
61
|
+
return [
|
|
62
|
+
trimString(schedule.id),
|
|
63
|
+
trimString(schedule.agentId),
|
|
64
|
+
trimString(schedule.name),
|
|
65
|
+
trimString(schedule.taskPrompt),
|
|
66
|
+
trimString(schedule.scheduleType),
|
|
67
|
+
trimString(schedule.cron),
|
|
68
|
+
typeof schedule.intervalMs === 'number' && Number.isFinite(schedule.intervalMs) ? Math.trunc(schedule.intervalMs) : '',
|
|
69
|
+
typeof schedule.runAt === 'number' && Number.isFinite(schedule.runAt) ? Math.trunc(schedule.runAt) : '',
|
|
70
|
+
trimString(schedule.timezone),
|
|
71
|
+
].join('\0')
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function stableScheduleStaggerMs(schedule: ScheduleTimingInput): number {
|
|
75
|
+
const windowMs = normalizeStaggerWindowMs(schedule.staggerSec)
|
|
76
|
+
if (windowMs <= 0) return 0
|
|
77
|
+
const digest = createHash('sha256').update(stableScheduleKey(schedule)).digest()
|
|
78
|
+
const value = digest.readBigUInt64BE(0)
|
|
79
|
+
return Number(value % BigInt(windowMs))
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function applyStableStagger(timestamp: number, schedule: ScheduleTimingInput): number {
|
|
83
|
+
return Math.trunc(timestamp + stableScheduleStaggerMs(schedule))
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function parseCron(schedule: ScheduleTimingInput, now: number) {
|
|
87
|
+
const cron = trimString(schedule.cron)
|
|
88
|
+
if (!cron) return null
|
|
89
|
+
const timezone = trimString(schedule.timezone)
|
|
90
|
+
return CronExpressionParser.parse(cron, {
|
|
91
|
+
...(timezone ? { tz: timezone } : {}),
|
|
92
|
+
currentDate: new Date(normalizeNow(now)),
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function computeScheduleNextRunAt(schedule: ScheduleTimingInput, now: number): number | undefined {
|
|
97
|
+
const scheduleType = trimString(schedule.scheduleType)
|
|
98
|
+
if (scheduleType === 'once') {
|
|
99
|
+
const runAt = normalizeTimestamp(schedule.runAt)
|
|
100
|
+
return runAt == null ? undefined : applyStableStagger(runAt, schedule)
|
|
101
|
+
}
|
|
102
|
+
if (scheduleType === 'interval') {
|
|
103
|
+
const intervalMs = normalizeTimestamp(schedule.intervalMs)
|
|
104
|
+
return intervalMs == null ? undefined : applyStableStagger(normalizeNow(now) + intervalMs, schedule)
|
|
105
|
+
}
|
|
106
|
+
if (scheduleType === 'cron') {
|
|
107
|
+
const interval = parseCron(schedule, now)
|
|
108
|
+
if (!interval) return undefined
|
|
109
|
+
return applyStableStagger(interval.next().getTime(), schedule)
|
|
110
|
+
}
|
|
111
|
+
return undefined
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function computeCronWindow(schedule: ScheduleTimingInput, now: number): { earliest: number; latest: number; nextRunAt: number } | null {
|
|
115
|
+
const interval = parseCron(schedule, now)
|
|
116
|
+
if (!interval) return null
|
|
117
|
+
const earliest = interval.next().getTime()
|
|
118
|
+
const latest = earliest + normalizeStaggerWindowMs(schedule.staggerSec)
|
|
119
|
+
return {
|
|
120
|
+
earliest,
|
|
121
|
+
latest,
|
|
122
|
+
nextRunAt: applyStableStagger(earliest, schedule),
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function assessScheduleNextRunRepair(
|
|
127
|
+
schedule: ScheduleTimingInput,
|
|
128
|
+
now: number,
|
|
129
|
+
): ScheduleNextRunRepairAssessment {
|
|
130
|
+
if (trimString(schedule.status) && trimString(schedule.status) !== 'active') return { ok: true, repair: false }
|
|
131
|
+
|
|
132
|
+
const previousNextRunAt = normalizeTimestamp(schedule.nextRunAt)
|
|
133
|
+
const hasNextRunAt = schedule.nextRunAt != null
|
|
134
|
+
if (previousNextRunAt != null && previousNextRunAt <= normalizeNow(now)) {
|
|
135
|
+
if (trimString(schedule.scheduleType) === 'cron') {
|
|
136
|
+
try {
|
|
137
|
+
if (!parseCron(schedule, now)) return { ok: false, reason: 'invalid_cron', previousNextRunAt }
|
|
138
|
+
} catch {
|
|
139
|
+
return { ok: false, reason: 'invalid_cron', previousNextRunAt }
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return { ok: true, repair: false }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (previousNextRunAt == null) {
|
|
146
|
+
try {
|
|
147
|
+
const nextRunAt = computeScheduleNextRunAt(schedule, now)
|
|
148
|
+
if (nextRunAt == null) return { ok: true, repair: false }
|
|
149
|
+
return {
|
|
150
|
+
ok: true,
|
|
151
|
+
repair: true,
|
|
152
|
+
reason: hasNextRunAt ? 'invalid' : 'missing',
|
|
153
|
+
nextRunAt,
|
|
154
|
+
previousNextRunAt: null,
|
|
155
|
+
}
|
|
156
|
+
} catch {
|
|
157
|
+
return { ok: false, reason: 'invalid_cron', previousNextRunAt: null }
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (trimString(schedule.scheduleType) !== 'cron') return { ok: true, repair: false }
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const window = computeCronWindow(schedule, now)
|
|
165
|
+
if (!window) return { ok: true, repair: false }
|
|
166
|
+
const tooEarly = previousNextRunAt < window.earliest - CRON_REPAIR_TOLERANCE_MS
|
|
167
|
+
const tooLate = previousNextRunAt > window.latest + CRON_REPAIR_TOLERANCE_MS
|
|
168
|
+
if (!tooEarly && !tooLate) return { ok: true, repair: false }
|
|
169
|
+
return {
|
|
170
|
+
ok: true,
|
|
171
|
+
repair: true,
|
|
172
|
+
reason: 'stale_future',
|
|
173
|
+
nextRunAt: window.nextRunAt,
|
|
174
|
+
previousNextRunAt,
|
|
175
|
+
}
|
|
176
|
+
} catch {
|
|
177
|
+
return { ok: false, reason: 'invalid_cron', previousNextRunAt }
|
|
178
|
+
}
|
|
179
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { afterEach, describe, it } from 'node:test'
|
|
3
|
+
import { buildWebTools } from './web'
|
|
4
|
+
import type { ToolBuildContext } from './context'
|
|
5
|
+
|
|
6
|
+
const originalFetch = globalThis.fetch
|
|
7
|
+
|
|
8
|
+
function createContext(): ToolBuildContext {
|
|
9
|
+
return {
|
|
10
|
+
cwd: process.cwd(),
|
|
11
|
+
ctx: undefined,
|
|
12
|
+
hasExtension: (name: string) => name === 'web',
|
|
13
|
+
hasTool: (name: string) => name === 'web',
|
|
14
|
+
cleanupFns: [],
|
|
15
|
+
commandTimeoutMs: 1000,
|
|
16
|
+
claudeTimeoutMs: 1000,
|
|
17
|
+
cliProcessTimeoutMs: 1000,
|
|
18
|
+
persistDelegateResumeId: () => {},
|
|
19
|
+
readStoredDelegateResumeId: () => null,
|
|
20
|
+
resolveCurrentSession: () => null,
|
|
21
|
+
activeExtensions: ['web'],
|
|
22
|
+
} as ToolBuildContext
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function mockFetch(pages: Record<string, string>, calls: string[] = []): void {
|
|
26
|
+
globalThis.fetch = (async (input: RequestInfo | URL) => {
|
|
27
|
+
const url = input instanceof Request ? input.url : String(input)
|
|
28
|
+
calls.push(url)
|
|
29
|
+
const html = pages[url]
|
|
30
|
+
if (!html) {
|
|
31
|
+
return new Response('missing', { status: 404, statusText: 'Not Found' })
|
|
32
|
+
}
|
|
33
|
+
return new Response(html, {
|
|
34
|
+
status: 200,
|
|
35
|
+
headers: { 'content-type': 'text/html; charset=utf-8' },
|
|
36
|
+
})
|
|
37
|
+
}) as typeof fetch
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
afterEach(() => {
|
|
41
|
+
globalThis.fetch = originalFetch
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
describe('web extract and crawl tools', () => {
|
|
45
|
+
it('registers direct granular web tools when web is enabled', () => {
|
|
46
|
+
const names = buildWebTools(createContext()).map((entry) => entry.name).sort()
|
|
47
|
+
|
|
48
|
+
assert.deepEqual(names.filter((name) => name.startsWith('web')), [
|
|
49
|
+
'web',
|
|
50
|
+
'web_crawl',
|
|
51
|
+
'web_extract',
|
|
52
|
+
'web_fetch',
|
|
53
|
+
'web_search',
|
|
54
|
+
])
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('extracts readable page content with title and source URL', async () => {
|
|
58
|
+
mockFetch({
|
|
59
|
+
'https://example.test/article': `
|
|
60
|
+
<!doctype html>
|
|
61
|
+
<title>Feature Page</title>
|
|
62
|
+
<header>Ignore navigation</header>
|
|
63
|
+
<main>
|
|
64
|
+
<h1>Feature Page</h1>
|
|
65
|
+
<p>Readable body text for the agent.</p>
|
|
66
|
+
</main>
|
|
67
|
+
<script>console.log('hidden')</script>
|
|
68
|
+
`,
|
|
69
|
+
})
|
|
70
|
+
const tool = buildWebTools(createContext()).find((entry) => entry.name === 'web_extract')
|
|
71
|
+
assert.ok(tool)
|
|
72
|
+
|
|
73
|
+
const output = String(await tool.invoke({ url: 'https://example.test/article#section' }))
|
|
74
|
+
|
|
75
|
+
assert.match(output, /Title: Feature Page/)
|
|
76
|
+
assert.match(output, /URL: https:\/\/example\.test\/article/)
|
|
77
|
+
assert.match(output, /Readable body text for the agent\./)
|
|
78
|
+
assert.doesNotMatch(output, /Ignore navigation/)
|
|
79
|
+
assert.doesNotMatch(output, /console\.log/)
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
it('crawls same-origin pages within the requested page and depth bounds', async () => {
|
|
83
|
+
const calls: string[] = []
|
|
84
|
+
mockFetch({
|
|
85
|
+
'https://site.test/': `
|
|
86
|
+
<title>Start</title>
|
|
87
|
+
<main>Start page <a href="/a">A</a> <a href="/b">B</a> <a href="https://external.test/x">External</a></main>
|
|
88
|
+
`,
|
|
89
|
+
'https://site.test/a': '<title>A page</title><main>Alpha content</main>',
|
|
90
|
+
'https://site.test/b': '<title>B page</title><main>Beta content</main>',
|
|
91
|
+
'https://external.test/x': '<title>External</title><main>Should not be fetched</main>',
|
|
92
|
+
}, calls)
|
|
93
|
+
const tool = buildWebTools(createContext()).find((entry) => entry.name === 'web_crawl')
|
|
94
|
+
assert.ok(tool)
|
|
95
|
+
|
|
96
|
+
const output = String(await tool.invoke({ url: 'https://site.test/', maxPages: 3, maxDepth: 1 }))
|
|
97
|
+
|
|
98
|
+
assert.match(output, /Crawl results for: https:\/\/site\.test\//)
|
|
99
|
+
assert.match(output, /Pages crawled: 3/)
|
|
100
|
+
assert.match(output, /Start page/)
|
|
101
|
+
assert.match(output, /Alpha content/)
|
|
102
|
+
assert.match(output, /Beta content/)
|
|
103
|
+
assert.doesNotMatch(output, /Should not be fetched/)
|
|
104
|
+
assert.deepEqual(calls, ['https://site.test/', 'https://site.test/a', 'https://site.test/b'])
|
|
105
|
+
})
|
|
106
|
+
})
|
|
@@ -20,6 +20,11 @@ describe('inferWebActionFromArgs', () => {
|
|
|
20
20
|
assert.equal(inferWebActionFromArgs({ action: 'search', url: 'https://example.com/article' }), 'search')
|
|
21
21
|
})
|
|
22
22
|
|
|
23
|
+
it('preserves explicit extract and crawl actions', () => {
|
|
24
|
+
assert.equal(inferWebActionFromArgs({ action: 'extract', url: 'https://example.com/article' }), 'extract')
|
|
25
|
+
assert.equal(inferWebActionFromArgs({ action: 'crawl', url: 'https://example.com/' }), 'crawl')
|
|
26
|
+
})
|
|
27
|
+
|
|
23
28
|
it('normalizes stringified browser form payloads', () => {
|
|
24
29
|
const normalized = normalizeBrowserActionParams({
|
|
25
30
|
input: JSON.stringify({
|
|
@@ -176,8 +176,14 @@ export function inferWebActionFromArgs(params: {
|
|
|
176
176
|
query?: string
|
|
177
177
|
url?: string
|
|
178
178
|
method?: string
|
|
179
|
-
}): 'search' | 'fetch' | 'api' | undefined {
|
|
180
|
-
if (
|
|
179
|
+
}): 'search' | 'fetch' | 'extract' | 'crawl' | 'api' | undefined {
|
|
180
|
+
if (
|
|
181
|
+
params.action === 'search'
|
|
182
|
+
|| params.action === 'fetch'
|
|
183
|
+
|| params.action === 'extract'
|
|
184
|
+
|| params.action === 'crawl'
|
|
185
|
+
|| params.action === 'api'
|
|
186
|
+
) return params.action
|
|
181
187
|
if (typeof params.method === 'string' && params.method.trim()) return 'api'
|
|
182
188
|
if (typeof params.url === 'string' && /^https?:\/\//i.test(params.url.trim())) return 'fetch'
|
|
183
189
|
if (typeof params.query === 'string' && params.query.trim()) return 'search'
|