@selextract/mcp-selextract 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -6
- package/dist/config.js +3 -1
- package/dist/resources.js +3 -3
- package/dist/tools.js +196 -121
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,11 +6,12 @@ If you can run `npx`, you can add it to your MCP client with one config entry.
|
|
|
6
6
|
|
|
7
7
|
## Env
|
|
8
8
|
|
|
9
|
-
- `SELEXTRACT_API_URL`
|
|
10
|
-
- Examples: `http://localhost:8246`, `https://api.selextract.com`, `https://api.selextract.com/api`
|
|
11
|
-
- This server normalizes it to end with `/api` (if you pass `/api/v1`, it will trim back to `/api`).
|
|
12
9
|
- `SELEXTRACT_API_KEY` (your `sk_...` user API key)
|
|
13
10
|
- Legacy alias: `SELEXTRACT_API_TOKEN`
|
|
11
|
+
- Optional: `SELEXTRACT_API_URL` (defaults to `https://app.selextract.com/api`)
|
|
12
|
+
- Use this only if you are pointing at a local dev server or a self-hosted endpoint.
|
|
13
|
+
- Examples: `http://localhost:8246`, `https://app.selextract.com/api`
|
|
14
|
+
- This server normalizes it to end with `/api` (if you pass `/api/v1`, it will trim back to `/api`).
|
|
14
15
|
- Optional: `SELEXTRACT_TIMEOUT_MS` (default: `30000`)
|
|
15
16
|
- Optional: `SELEXTRACT_MAX_RESPONSE_CHARS` (default: `30000`)
|
|
16
17
|
- Optional: `SELEXTRACT_ENV_FILE` (default: `.env`)
|
|
@@ -32,7 +33,6 @@ Add this to your MCP config (keep keys in env vars if possible):
|
|
|
32
33
|
"command": "npx",
|
|
33
34
|
"args": ["-y", "--package", "@selextract/mcp-selextract", "mcp-selextract"],
|
|
34
35
|
"env": {
|
|
35
|
-
"SELEXTRACT_API_URL": "http://localhost:8246",
|
|
36
36
|
"SELEXTRACT_API_KEY": "sk_REPLACE_ME"
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -40,6 +40,14 @@ Add this to your MCP config (keep keys in env vars if possible):
|
|
|
40
40
|
}
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
+
If you want to point at a local dev server, add:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"SELEXTRACT_API_URL": "http://localhost:8246"
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
43
51
|
## What you can do
|
|
44
52
|
|
|
45
53
|
Typical flow:
|
|
@@ -53,11 +61,30 @@ Typical flow:
|
|
|
53
61
|
Useful extras:
|
|
54
62
|
|
|
55
63
|
- Draft cleanup: `task_draft_delete`
|
|
56
|
-
- Task repair (self-healing rebuild): `task_repair`
|
|
57
|
-
- Recipe versioning (rollback/switch): `task_recipe_versions`, `task_set_recipe_version`
|
|
58
64
|
- Authenticated scraping: access profile tools (create/list/update/delete/build-session)
|
|
59
65
|
- Run lifecycle: `run_get`, `run_list`, `run_stop`, `run_delete`
|
|
60
66
|
|
|
67
|
+
## Locale / region controls
|
|
68
|
+
|
|
69
|
+
You can control language/region in a general way (works across many sites):
|
|
70
|
+
|
|
71
|
+
- `task_create`
|
|
72
|
+
- `url_params`: adds or overrides query params on the URL (example: `{"hl":"en","gl":"US"}`)
|
|
73
|
+
- `options.acceptLanguage`: sets the `Accept-Language` request header
|
|
74
|
+
- `options.locale`: sets Playwright locale (example: `en-US`)
|
|
75
|
+
- `options.timezoneId`: sets Playwright timezoneId (example: `America/Los_Angeles`)
|
|
76
|
+
- `run_create`
|
|
77
|
+
- `options`: same as above (applies to the run)
|
|
78
|
+
- `budgets.maxRows`: stop after N rows (example: `100`)
|
|
79
|
+
|
|
80
|
+
## Field types
|
|
81
|
+
|
|
82
|
+
For `task_create.fields`, `type` can be one of:
|
|
83
|
+
|
|
84
|
+
- `text`, `number`, `money`, `url`, `image_url`, `html`, `unknown`
|
|
85
|
+
|
|
86
|
+
Common aliases like `string`, `int`, `price`, and `link` are accepted and mapped.
|
|
87
|
+
|
|
61
88
|
## Resources (read-only)
|
|
62
89
|
|
|
63
90
|
- `selextract://help` (usage guide)
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
const DEFAULT_SELEXTRACT_API_URL = 'https://app.selextract.com/api';
|
|
2
3
|
const EnvSchema = z.object({
|
|
3
|
-
|
|
4
|
+
// Optional because Selextract Cloud is the default.
|
|
5
|
+
SELEXTRACT_API_URL: z.string().min(1).optional().default(DEFAULT_SELEXTRACT_API_URL),
|
|
4
6
|
SELEXTRACT_API_KEY: z.string().min(1).optional(),
|
|
5
7
|
// Legacy alias (kept for older configs).
|
|
6
8
|
SELEXTRACT_API_TOKEN: z.string().min(1).optional(),
|
package/dist/resources.js
CHANGED
|
@@ -31,16 +31,16 @@ export async function readResource(opts) {
|
|
|
31
31
|
'',
|
|
32
32
|
'This MCP server exposes only **basic** operations for:',
|
|
33
33
|
'- Creating AI-built tasks (draft → publish)',
|
|
34
|
-
'- Repairing tasks (rebuild recipe)',
|
|
35
|
-
'- Switching task recipe versions (rollback)',
|
|
36
34
|
'- Running tasks (create/get/list/stop/delete runs)',
|
|
37
35
|
'- Managing access profiles (for logged-in/session scraping)',
|
|
38
36
|
'',
|
|
39
37
|
'It intentionally does **not** expose trace/scratchpad or other deep debugging data.',
|
|
40
38
|
'',
|
|
41
39
|
'Required env vars:',
|
|
42
|
-
'- SELEXTRACT_API_URL',
|
|
43
40
|
'- SELEXTRACT_API_KEY (or legacy SELEXTRACT_API_TOKEN)',
|
|
41
|
+
'',
|
|
42
|
+
'Optional env vars:',
|
|
43
|
+
'- SELEXTRACT_API_URL (defaults to https://app.selextract.com/api)',
|
|
44
44
|
].join('\n');
|
|
45
45
|
return {
|
|
46
46
|
contents: [{ uri: opts.uri, mimeType: 'text/markdown', text: value }],
|
package/dist/tools.js
CHANGED
|
@@ -11,44 +11,157 @@ function asTextResult(value, maxChars) {
|
|
|
11
11
|
],
|
|
12
12
|
};
|
|
13
13
|
}
|
|
14
|
+
const AgentFieldTypeSchema = z.enum(['text', 'number', 'money', 'url', 'image_url', 'html', 'unknown']);
|
|
15
|
+
function coerceAgentFieldType(raw) {
|
|
16
|
+
if (raw == null)
|
|
17
|
+
return undefined;
|
|
18
|
+
if (typeof raw !== 'string')
|
|
19
|
+
return undefined;
|
|
20
|
+
const cleaned = raw.trim().toLowerCase();
|
|
21
|
+
if (!cleaned)
|
|
22
|
+
return undefined;
|
|
23
|
+
const direct = AgentFieldTypeSchema.safeParse(cleaned);
|
|
24
|
+
if (direct.success)
|
|
25
|
+
return direct.data;
|
|
26
|
+
const map = {
|
|
27
|
+
string: 'text',
|
|
28
|
+
str: 'text',
|
|
29
|
+
text: 'text',
|
|
30
|
+
number: 'number',
|
|
31
|
+
int: 'number',
|
|
32
|
+
integer: 'number',
|
|
33
|
+
float: 'number',
|
|
34
|
+
double: 'number',
|
|
35
|
+
decimal: 'number',
|
|
36
|
+
currency: 'money',
|
|
37
|
+
price: 'money',
|
|
38
|
+
money: 'money',
|
|
39
|
+
link: 'url',
|
|
40
|
+
href: 'url',
|
|
41
|
+
uri: 'url',
|
|
42
|
+
url: 'url',
|
|
43
|
+
image: 'image_url',
|
|
44
|
+
img: 'image_url',
|
|
45
|
+
'image-url': 'image_url',
|
|
46
|
+
imageurl: 'image_url',
|
|
47
|
+
html: 'html',
|
|
48
|
+
markup: 'html',
|
|
49
|
+
any: 'unknown',
|
|
50
|
+
json: 'unknown',
|
|
51
|
+
unknown: 'unknown',
|
|
52
|
+
};
|
|
53
|
+
return map[cleaned];
|
|
54
|
+
}
|
|
14
55
|
const FieldSchema = z
|
|
15
|
-
.
|
|
56
|
+
.preprocess((raw) => {
|
|
57
|
+
if (!raw || typeof raw !== 'object')
|
|
58
|
+
return raw;
|
|
59
|
+
const value = raw;
|
|
60
|
+
return {
|
|
61
|
+
...value,
|
|
62
|
+
value: value.value ?? value.description,
|
|
63
|
+
type: coerceAgentFieldType(value.type) ?? value.type,
|
|
64
|
+
};
|
|
65
|
+
}, z.object({
|
|
16
66
|
name: z.string().min(1).optional(),
|
|
17
67
|
value: z.string().min(1).optional(),
|
|
18
|
-
type:
|
|
68
|
+
type: AgentFieldTypeSchema.optional(),
|
|
19
69
|
required: z.boolean().optional(),
|
|
20
70
|
examples: z.array(z.string()).optional(),
|
|
21
|
-
})
|
|
71
|
+
}))
|
|
22
72
|
.refine((data) => Boolean(data.name?.trim() || data.value?.trim()), {
|
|
23
73
|
message: 'Provide a field name (name) or a short description (value).',
|
|
24
74
|
path: ['value'],
|
|
25
75
|
});
|
|
76
|
+
const UrlParamsSchema = z.record(z.union([z.string(), z.number(), z.boolean(), z.null()]));
|
|
77
|
+
const TaskBuildOptionsSchema = z.preprocess((raw) => {
|
|
78
|
+
if (!raw || typeof raw !== 'object')
|
|
79
|
+
return raw;
|
|
80
|
+
const value = raw;
|
|
81
|
+
return {
|
|
82
|
+
...value,
|
|
83
|
+
userAgent: value.userAgent ?? value.user_agent,
|
|
84
|
+
acceptLanguage: value.acceptLanguage ?? value.accept_language,
|
|
85
|
+
timezoneId: value.timezoneId ?? value.timezone_id,
|
|
86
|
+
pageTimeoutMs: value.pageTimeoutMs ?? value.page_timeout_ms,
|
|
87
|
+
};
|
|
88
|
+
}, z
|
|
89
|
+
.object({
|
|
90
|
+
userAgent: z.string().min(1).max(500).optional(),
|
|
91
|
+
acceptLanguage: z.string().min(1).max(500).optional(),
|
|
92
|
+
locale: z.string().min(1).max(64).optional(),
|
|
93
|
+
timezoneId: z.string().min(1).max(64).optional(),
|
|
94
|
+
pageTimeoutMs: z.number().int().min(1000).max(120000).optional(),
|
|
95
|
+
})
|
|
96
|
+
.passthrough());
|
|
97
|
+
const RunOptionsSchema = z.preprocess((raw) => {
|
|
98
|
+
if (!raw || typeof raw !== 'object')
|
|
99
|
+
return raw;
|
|
100
|
+
const value = raw;
|
|
101
|
+
return {
|
|
102
|
+
...value,
|
|
103
|
+
userAgent: value.userAgent ?? value.user_agent,
|
|
104
|
+
acceptLanguage: value.acceptLanguage ?? value.accept_language,
|
|
105
|
+
timezoneId: value.timezoneId ?? value.timezone_id,
|
|
106
|
+
waitForSelector: value.waitForSelector ?? value.wait_for_selector ?? value.wait_for,
|
|
107
|
+
delay: value.delay ?? value.delay_ms,
|
|
108
|
+
};
|
|
109
|
+
}, z
|
|
110
|
+
.object({
|
|
111
|
+
timeout: z.number().int().min(1000).max(120000).optional(),
|
|
112
|
+
userAgent: z.string().min(1).max(500).optional(),
|
|
113
|
+
acceptLanguage: z.string().min(1).max(500).optional(),
|
|
114
|
+
locale: z.string().min(1).max(64).optional(),
|
|
115
|
+
timezoneId: z.string().min(1).max(64).optional(),
|
|
116
|
+
waitForSelector: z.string().min(1).max(500).optional(),
|
|
117
|
+
delay: z.number().int().min(0).max(60000).optional(),
|
|
118
|
+
retries: z.number().int().min(0).max(10).optional(),
|
|
119
|
+
})
|
|
120
|
+
.passthrough());
|
|
121
|
+
const RunBudgetsSchema = z
|
|
122
|
+
.object({
|
|
123
|
+
maxPages: z.number().int().min(1).max(1000).optional(),
|
|
124
|
+
maxScrolls: z.number().int().min(1).max(500).optional(),
|
|
125
|
+
maxTimeMs: z.number().int().min(1000).max(3600000).optional(),
|
|
126
|
+
maxRows: z.number().int().min(1).max(200000).optional(),
|
|
127
|
+
maxSteps: z.number().int().min(1).max(500).optional(),
|
|
128
|
+
maxRowBytes: z.number().int().min(100).max(1000000).optional(),
|
|
129
|
+
})
|
|
130
|
+
.passthrough();
|
|
26
131
|
const TaskCreateInputSchema = z.preprocess((raw) => {
|
|
27
132
|
if (!raw || typeof raw !== 'object')
|
|
28
133
|
return raw;
|
|
29
134
|
const value = raw;
|
|
135
|
+
const options = value.options && typeof value.options === 'object' ? { ...value.options } : {};
|
|
136
|
+
if (value.userAgent ?? value.user_agent)
|
|
137
|
+
options.userAgent = value.userAgent ?? value.user_agent;
|
|
138
|
+
if (value.acceptLanguage ?? value.accept_language)
|
|
139
|
+
options.acceptLanguage = value.acceptLanguage ?? value.accept_language;
|
|
140
|
+
if (value.locale)
|
|
141
|
+
options.locale = value.locale;
|
|
142
|
+
if (value.timezoneId ?? value.timezone_id)
|
|
143
|
+
options.timezoneId = value.timezoneId ?? value.timezone_id;
|
|
144
|
+
if (value.pageTimeoutMs ?? value.page_timeout_ms)
|
|
145
|
+
options.pageTimeoutMs = value.pageTimeoutMs ?? value.page_timeout_ms;
|
|
30
146
|
return {
|
|
31
147
|
...value,
|
|
32
148
|
access_profile_id: value.access_profile_id ?? value.accessProfileId,
|
|
33
149
|
field_mode: value.field_mode ?? value.fieldMode ?? value.mode,
|
|
34
|
-
build_mode: value.build_mode ?? value.buildMode,
|
|
35
|
-
script_source: value.script_source ?? value.scriptSource,
|
|
36
|
-
recipe_override: value.recipe_override ?? value.recipeOverride,
|
|
37
150
|
max_preview_rows: value.max_preview_rows ?? value.maxPreviewRows,
|
|
38
151
|
goal: value.goal ?? value.description,
|
|
152
|
+
url_params: value.url_params ?? value.urlParams,
|
|
153
|
+
options: Object.keys(options).length ? options : undefined,
|
|
39
154
|
};
|
|
40
155
|
}, z
|
|
41
156
|
.object({
|
|
42
157
|
url: z.string().url(),
|
|
158
|
+
url_params: UrlParamsSchema.optional(),
|
|
43
159
|
access_profile_id: z.string().uuid().optional(),
|
|
44
160
|
field_mode: z.enum(['auto', 'manual']).optional(),
|
|
45
|
-
build_mode: z.enum(['selectors', 'flow', 'code', 'auto']).optional().default('auto'),
|
|
46
|
-
script_source: z.string().min(1).optional(),
|
|
47
|
-
recipe_override: z.any().optional(),
|
|
48
161
|
goal: z.string().min(1).optional(),
|
|
49
162
|
fields: z.array(FieldSchema).optional(),
|
|
50
163
|
max_preview_rows: z.number().int().positive().optional().default(10),
|
|
51
|
-
|
|
164
|
+
options: TaskBuildOptionsSchema.optional(),
|
|
52
165
|
})
|
|
53
166
|
.superRefine((data, ctx) => {
|
|
54
167
|
const effectiveFieldMode = data.field_mode ?? (data.fields?.length ? 'manual' : 'auto');
|
|
@@ -79,39 +192,6 @@ const TaskPublishInputSchema = z.preprocess((raw) => {
|
|
|
79
192
|
const TaskDraftDeleteInputSchema = z.object({
|
|
80
193
|
preview_id: z.string().uuid(),
|
|
81
194
|
});
|
|
82
|
-
const TaskRepairInputSchema = z.preprocess((raw) => {
|
|
83
|
-
if (!raw || typeof raw !== 'object')
|
|
84
|
-
return raw;
|
|
85
|
-
const value = raw;
|
|
86
|
-
return {
|
|
87
|
-
...value,
|
|
88
|
-
hint: value.hint ?? value.what_is_wrong ?? value.whatIsWrong ?? value.issue,
|
|
89
|
-
};
|
|
90
|
-
}, z
|
|
91
|
-
.object({
|
|
92
|
-
task_id: z.string().uuid(),
|
|
93
|
-
hint: z.string().min(1).max(2000).optional(),
|
|
94
|
-
force: z.boolean().optional().default(false),
|
|
95
|
-
})
|
|
96
|
-
.passthrough());
|
|
97
|
-
const TaskRecipeVersionsInputSchema = z.object({
|
|
98
|
-
task_id: z.string().uuid(),
|
|
99
|
-
});
|
|
100
|
-
const TaskSetRecipeVersionInputSchema = z
|
|
101
|
-
.object({
|
|
102
|
-
task_id: z.string().uuid(),
|
|
103
|
-
recipe_version_id: z.string().uuid().optional(),
|
|
104
|
-
version: z.coerce.number().int().positive().optional(),
|
|
105
|
-
})
|
|
106
|
-
.superRefine((data, ctx) => {
|
|
107
|
-
if (!data.recipe_version_id && !data.version) {
|
|
108
|
-
ctx.addIssue({
|
|
109
|
-
code: z.ZodIssueCode.custom,
|
|
110
|
-
message: 'Provide recipe_version_id or version.',
|
|
111
|
-
path: ['recipe_version_id'],
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
});
|
|
115
195
|
const AccessProfileHeaderSchema = z.object({
|
|
116
196
|
name: z.string().min(1),
|
|
117
197
|
value: z.string().min(1),
|
|
@@ -185,14 +265,43 @@ export const ToolInputs = {
|
|
|
185
265
|
task_build_status: TaskBuildStatusInputSchema,
|
|
186
266
|
task_publish: TaskPublishInputSchema,
|
|
187
267
|
task_draft_delete: TaskDraftDeleteInputSchema,
|
|
188
|
-
task_repair: TaskRepairInputSchema,
|
|
189
|
-
task_recipe_versions: TaskRecipeVersionsInputSchema,
|
|
190
|
-
task_set_recipe_version: TaskSetRecipeVersionInputSchema,
|
|
191
268
|
// Runs
|
|
192
|
-
run_create: z.
|
|
269
|
+
run_create: z.preprocess((raw) => {
|
|
270
|
+
if (!raw || typeof raw !== 'object')
|
|
271
|
+
return raw;
|
|
272
|
+
const value = raw;
|
|
273
|
+
const options = value.options && typeof value.options === 'object' ? { ...value.options } : {};
|
|
274
|
+
if (value.userAgent ?? value.user_agent)
|
|
275
|
+
options.userAgent = value.userAgent ?? value.user_agent;
|
|
276
|
+
if (value.acceptLanguage ?? value.accept_language)
|
|
277
|
+
options.acceptLanguage = value.acceptLanguage ?? value.accept_language;
|
|
278
|
+
if (value.locale)
|
|
279
|
+
options.locale = value.locale;
|
|
280
|
+
if (value.timezoneId ?? value.timezone_id)
|
|
281
|
+
options.timezoneId = value.timezoneId ?? value.timezone_id;
|
|
282
|
+
if (value.timeout)
|
|
283
|
+
options.timeout = value.timeout;
|
|
284
|
+
if (value.waitForSelector ?? value.wait_for_selector ?? value.wait_for)
|
|
285
|
+
options.waitForSelector = value.waitForSelector ?? value.wait_for_selector ?? value.wait_for;
|
|
286
|
+
if (value.delay ?? value.delay_ms)
|
|
287
|
+
options.delay = value.delay ?? value.delay_ms;
|
|
288
|
+
if (value.retries)
|
|
289
|
+
options.retries = value.retries;
|
|
290
|
+
return {
|
|
291
|
+
...value,
|
|
292
|
+
pagination: value.pagination ?? value.page,
|
|
293
|
+
options: Object.keys(options).length ? options : undefined,
|
|
294
|
+
};
|
|
295
|
+
}, z
|
|
296
|
+
.object({
|
|
193
297
|
task_id: z.string().uuid(),
|
|
194
298
|
max_runtime_seconds: z.number().int().min(30).max(3600).optional(),
|
|
195
|
-
|
|
299
|
+
pagination: z.record(z.any()).optional(),
|
|
300
|
+
budgets: RunBudgetsSchema.optional(),
|
|
301
|
+
options: RunOptionsSchema.optional(),
|
|
302
|
+
test: z.boolean().optional(),
|
|
303
|
+
})
|
|
304
|
+
.passthrough()),
|
|
196
305
|
run_get: z.object({
|
|
197
306
|
run_id: z.string().uuid(),
|
|
198
307
|
}),
|
|
@@ -239,15 +348,23 @@ export function toolDefinitions() {
|
|
|
239
348
|
type: 'object',
|
|
240
349
|
properties: {
|
|
241
350
|
url: { type: 'string', description: 'The page to analyze.' },
|
|
351
|
+
url_params: { type: 'object', description: 'Optional query params to add/override on the URL (ex: {"hl":"en","gl":"US"}).' },
|
|
242
352
|
access_profile_id: { type: 'string', description: 'Optional access profile ID for logged-in/session scraping.' },
|
|
243
353
|
field_mode: { type: 'string', enum: ['auto', 'manual'], description: 'auto = infer fields; manual = use provided fields.' },
|
|
244
|
-
build_mode: { type: 'string', enum: ['selectors', 'flow', 'code', 'auto'], description: 'How to build the draft (default: auto).' },
|
|
245
|
-
script_source: { type: 'string', description: 'Optional Playwright script source (code mode). If provided, the server previews this exact script.' },
|
|
246
|
-
recipe_override: { type: 'object', description: 'Optional base recipe to reuse when previewing custom scripts (keeps flow/dom settings).' },
|
|
247
354
|
goal: { type: 'string', description: 'Optional short description of what to extract (helps in auto mode).' },
|
|
248
355
|
fields: { type: 'array', items: { type: 'object' }, description: 'Fields to extract (required in manual mode).' },
|
|
249
356
|
max_preview_rows: { type: 'number', description: 'How many sample rows to generate in the preview (default: 10).' },
|
|
250
|
-
|
|
357
|
+
options: {
|
|
358
|
+
type: 'object',
|
|
359
|
+
description: 'Optional browsing controls (language/region, time zone, user agent, timeouts).',
|
|
360
|
+
properties: {
|
|
361
|
+
userAgent: { type: 'string' },
|
|
362
|
+
acceptLanguage: { type: 'string', description: 'Sets the Accept-Language request header.' },
|
|
363
|
+
locale: { type: 'string', description: 'Sets Playwright locale (ex: en-US).' },
|
|
364
|
+
timezoneId: { type: 'string', description: 'Sets Playwright timezoneId (IANA, ex: America/Los_Angeles).' },
|
|
365
|
+
pageTimeoutMs: { type: 'number', description: 'Page timeout (ms) used during the build.' },
|
|
366
|
+
},
|
|
367
|
+
},
|
|
251
368
|
},
|
|
252
369
|
required: ['url'],
|
|
253
370
|
},
|
|
@@ -283,41 +400,6 @@ export function toolDefinitions() {
|
|
|
283
400
|
required: ['preview_id'],
|
|
284
401
|
},
|
|
285
402
|
},
|
|
286
|
-
{
|
|
287
|
-
name: 'task_repair',
|
|
288
|
-
description: 'Repair a saved task by re-running the agent builder (creates a new recipe version and updates the task in place). Returns build_job_id.',
|
|
289
|
-
inputSchema: {
|
|
290
|
-
type: 'object',
|
|
291
|
-
properties: {
|
|
292
|
-
task_id: { type: 'string', description: 'Task ID.' },
|
|
293
|
-
hint: { type: 'string', description: 'Optional note about what is wrong (helps steer the repair).' },
|
|
294
|
-
force: { type: 'boolean', description: 'If true, queue even if a build is already in progress.' },
|
|
295
|
-
},
|
|
296
|
-
required: ['task_id'],
|
|
297
|
-
},
|
|
298
|
-
},
|
|
299
|
-
{
|
|
300
|
-
name: 'task_recipe_versions',
|
|
301
|
-
description: 'List recipe versions for a task (for rollback/version switching).',
|
|
302
|
-
inputSchema: {
|
|
303
|
-
type: 'object',
|
|
304
|
-
properties: { task_id: { type: 'string', description: 'Task ID.' } },
|
|
305
|
-
required: ['task_id'],
|
|
306
|
-
},
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
name: 'task_set_recipe_version',
|
|
310
|
-
description: 'Switch a task to use a specific recipe version (by recipe_version_id or by version number).',
|
|
311
|
-
inputSchema: {
|
|
312
|
-
type: 'object',
|
|
313
|
-
properties: {
|
|
314
|
-
task_id: { type: 'string', description: 'Task ID.' },
|
|
315
|
-
recipe_version_id: { type: 'string', description: 'Recipe version ID.' },
|
|
316
|
-
version: { type: 'number', description: 'Recipe version number (1, 2, 3, ...).' },
|
|
317
|
-
},
|
|
318
|
-
required: ['task_id'],
|
|
319
|
-
},
|
|
320
|
-
},
|
|
321
403
|
// Runs
|
|
322
404
|
{
|
|
323
405
|
name: 'run_create',
|
|
@@ -327,6 +409,23 @@ export function toolDefinitions() {
|
|
|
327
409
|
properties: {
|
|
328
410
|
task_id: { type: 'string', description: 'Task ID.' },
|
|
329
411
|
max_runtime_seconds: { type: 'number', description: 'Optional hard limit for run time (seconds).' },
|
|
412
|
+
pagination: { type: 'object', description: 'Optional pagination override for this run only.' },
|
|
413
|
+
budgets: { type: 'object', description: 'Optional safety limits for this run only (ex: {"maxRows":100}).' },
|
|
414
|
+
options: {
|
|
415
|
+
type: 'object',
|
|
416
|
+
description: 'Optional browsing controls (language/region, time zone, user agent, timeouts).',
|
|
417
|
+
properties: {
|
|
418
|
+
timeout: { type: 'number', description: 'Navigation/step timeout (ms).' },
|
|
419
|
+
userAgent: { type: 'string' },
|
|
420
|
+
acceptLanguage: { type: 'string', description: 'Sets the Accept-Language request header.' },
|
|
421
|
+
locale: { type: 'string', description: 'Sets Playwright locale (ex: en-US).' },
|
|
422
|
+
timezoneId: { type: 'string', description: 'Sets Playwright timezoneId (IANA, ex: America/Los_Angeles).' },
|
|
423
|
+
waitForSelector: { type: 'string', description: 'Wait for a selector after navigation.' },
|
|
424
|
+
delay: { type: 'number', description: 'Extra delay (ms) after navigation.' },
|
|
425
|
+
retries: { type: 'number', description: 'Retries for certain flow steps (0-10).' },
|
|
426
|
+
},
|
|
427
|
+
},
|
|
428
|
+
test: { type: 'boolean', description: 'If true, reduces pagination (best-effort) for a quick smoke run.' },
|
|
330
429
|
},
|
|
331
430
|
required: ['task_id'],
|
|
332
431
|
},
|
|
@@ -470,15 +569,13 @@ export function toolHandlers(api, maxChars) {
|
|
|
470
569
|
path: '/v1/agent/extractions/build',
|
|
471
570
|
body: {
|
|
472
571
|
url: input.url,
|
|
572
|
+
...(input.url_params ? { urlParams: input.url_params } : {}),
|
|
573
|
+
...(input.options ? { options: input.options } : {}),
|
|
473
574
|
access_profile_id: input.access_profile_id,
|
|
474
575
|
field_mode: effectiveFieldMode,
|
|
475
|
-
build_mode: input.build_mode,
|
|
476
|
-
...(input.script_source ? { script_source: input.script_source } : {}),
|
|
477
|
-
...(input.recipe_override ? { recipe_override: input.recipe_override } : {}),
|
|
478
576
|
goal: input.goal,
|
|
479
577
|
fields: input.fields,
|
|
480
578
|
maxPreviewRows: input.max_preview_rows,
|
|
481
|
-
advanced: input.advanced,
|
|
482
579
|
debug: false,
|
|
483
580
|
},
|
|
484
581
|
});
|
|
@@ -523,41 +620,19 @@ export function toolHandlers(api, maxChars) {
|
|
|
523
620
|
});
|
|
524
621
|
return asTextResult(result, maxChars);
|
|
525
622
|
},
|
|
526
|
-
task_repair: async (raw) => {
|
|
527
|
-
const input = ToolInputs.task_repair.parse(raw);
|
|
528
|
-
const result = await api.request({
|
|
529
|
-
method: 'POST',
|
|
530
|
-
path: `/v1/agent/tasks/${input.task_id}/repair`,
|
|
531
|
-
body: input.hint || input.force ? { hint: input.hint, force: input.force } : {},
|
|
532
|
-
});
|
|
533
|
-
return asTextResult(result, maxChars);
|
|
534
|
-
},
|
|
535
|
-
task_recipe_versions: async (raw) => {
|
|
536
|
-
const input = ToolInputs.task_recipe_versions.parse(raw);
|
|
537
|
-
const result = await api.request({
|
|
538
|
-
method: 'GET',
|
|
539
|
-
path: `/v1/tasks/${input.task_id}/recipe-versions`,
|
|
540
|
-
});
|
|
541
|
-
return asTextResult(result, maxChars);
|
|
542
|
-
},
|
|
543
|
-
task_set_recipe_version: async (raw) => {
|
|
544
|
-
const input = ToolInputs.task_set_recipe_version.parse(raw);
|
|
545
|
-
const result = await api.request({
|
|
546
|
-
method: 'POST',
|
|
547
|
-
path: `/v1/tasks/${input.task_id}/recipe-version`,
|
|
548
|
-
body: {
|
|
549
|
-
recipe_version_id: input.recipe_version_id,
|
|
550
|
-
version: input.version,
|
|
551
|
-
},
|
|
552
|
-
});
|
|
553
|
-
return asTextResult(result, maxChars);
|
|
554
|
-
},
|
|
555
623
|
run_create: async (raw) => {
|
|
556
624
|
const input = ToolInputs.run_create.parse(raw);
|
|
625
|
+
const body = {
|
|
626
|
+
...(input.max_runtime_seconds ? { max_runtime_seconds: input.max_runtime_seconds } : {}),
|
|
627
|
+
...(input.pagination ? { pagination: input.pagination } : {}),
|
|
628
|
+
...(input.budgets ? { budgets: input.budgets } : {}),
|
|
629
|
+
...(input.options ? { options: input.options } : {}),
|
|
630
|
+
...(input.test === true ? { test: true } : {}),
|
|
631
|
+
};
|
|
557
632
|
const result = await api.request({
|
|
558
633
|
method: 'POST',
|
|
559
634
|
path: `/v1/tasks/${input.task_id}/runs`,
|
|
560
|
-
body:
|
|
635
|
+
body: Object.keys(body).length ? body : {},
|
|
561
636
|
});
|
|
562
637
|
return asTextResult(result, maxChars);
|
|
563
638
|
},
|