@selextract/mcp-selextract 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -6
- package/dist/config.js +3 -1
- package/dist/resources.js +3 -3
- package/dist/tools.js +0 -115
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,11 +6,12 @@ If you can run `npx`, you can add it to your MCP client with one config entry.
|
|
|
6
6
|
|
|
7
7
|
## Env
|
|
8
8
|
|
|
9
|
-
- `SELEXTRACT_API_URL`
|
|
10
|
-
- Examples: `http://localhost:8246`, `https://api.selextract.com`, `https://api.selextract.com/api`
|
|
11
|
-
- This server normalizes it to end with `/api` (if you pass `/api/v1`, it will trim back to `/api`).
|
|
12
9
|
- `SELEXTRACT_API_KEY` (your `sk_...` user API key)
|
|
13
10
|
- Legacy alias: `SELEXTRACT_API_TOKEN`
|
|
11
|
+
- Optional: `SELEXTRACT_API_URL` (defaults to `https://app.selextract.com/api`)
|
|
12
|
+
- Use this only if you are pointing at a local dev server or a self-hosted endpoint.
|
|
13
|
+
- Examples: `http://localhost:8246`, `https://app.selextract.com/api`
|
|
14
|
+
- This server normalizes it to end with `/api` (if you pass `/api/v1`, it will trim back to `/api`).
|
|
14
15
|
- Optional: `SELEXTRACT_TIMEOUT_MS` (default: `30000`)
|
|
15
16
|
- Optional: `SELEXTRACT_MAX_RESPONSE_CHARS` (default: `30000`)
|
|
16
17
|
- Optional: `SELEXTRACT_ENV_FILE` (default: `.env`)
|
|
@@ -32,7 +33,6 @@ Add this to your MCP config (keep keys in env vars if possible):
|
|
|
32
33
|
"command": "npx",
|
|
33
34
|
"args": ["-y", "--package", "@selextract/mcp-selextract", "mcp-selextract"],
|
|
34
35
|
"env": {
|
|
35
|
-
"SELEXTRACT_API_URL": "http://localhost:8246",
|
|
36
36
|
"SELEXTRACT_API_KEY": "sk_REPLACE_ME"
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -40,6 +40,14 @@ Add this to your MCP config (keep keys in env vars if possible):
|
|
|
40
40
|
}
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
+
If you want to point at a local dev server, add:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"SELEXTRACT_API_URL": "http://localhost:8246"
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
43
51
|
## What you can do
|
|
44
52
|
|
|
45
53
|
Typical flow:
|
|
@@ -53,8 +61,6 @@ Typical flow:
|
|
|
53
61
|
Useful extras:
|
|
54
62
|
|
|
55
63
|
- Draft cleanup: `task_draft_delete`
|
|
56
|
-
- Task repair (self-healing rebuild): `task_repair`
|
|
57
|
-
- Recipe versioning (rollback/switch): `task_recipe_versions`, `task_set_recipe_version`
|
|
58
64
|
- Authenticated scraping: access profile tools (create/list/update/delete/build-session)
|
|
59
65
|
- Run lifecycle: `run_get`, `run_list`, `run_stop`, `run_delete`
|
|
60
66
|
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
const DEFAULT_SELEXTRACT_API_URL = 'https://app.selextract.com/api';
|
|
2
3
|
const EnvSchema = z.object({
|
|
3
|
-
|
|
4
|
+
// Optional because Selextract Cloud is the default.
|
|
5
|
+
SELEXTRACT_API_URL: z.string().min(1).optional().default(DEFAULT_SELEXTRACT_API_URL),
|
|
4
6
|
SELEXTRACT_API_KEY: z.string().min(1).optional(),
|
|
5
7
|
// Legacy alias (kept for older configs).
|
|
6
8
|
SELEXTRACT_API_TOKEN: z.string().min(1).optional(),
|
package/dist/resources.js
CHANGED
|
@@ -31,16 +31,16 @@ export async function readResource(opts) {
|
|
|
31
31
|
'',
|
|
32
32
|
'This MCP server exposes only **basic** operations for:',
|
|
33
33
|
'- Creating AI-built tasks (draft → publish)',
|
|
34
|
-
'- Repairing tasks (rebuild recipe)',
|
|
35
|
-
'- Switching task recipe versions (rollback)',
|
|
36
34
|
'- Running tasks (create/get/list/stop/delete runs)',
|
|
37
35
|
'- Managing access profiles (for logged-in/session scraping)',
|
|
38
36
|
'',
|
|
39
37
|
'It intentionally does **not** expose trace/scratchpad or other deep debugging data.',
|
|
40
38
|
'',
|
|
41
39
|
'Required env vars:',
|
|
42
|
-
'- SELEXTRACT_API_URL',
|
|
43
40
|
'- SELEXTRACT_API_KEY (or legacy SELEXTRACT_API_TOKEN)',
|
|
41
|
+
'',
|
|
42
|
+
'Optional env vars:',
|
|
43
|
+
'- SELEXTRACT_API_URL (defaults to https://app.selextract.com/api)',
|
|
44
44
|
].join('\n');
|
|
45
45
|
return {
|
|
46
46
|
contents: [{ uri: opts.uri, mimeType: 'text/markdown', text: value }],
|
package/dist/tools.js
CHANGED
|
@@ -31,9 +31,6 @@ const TaskCreateInputSchema = z.preprocess((raw) => {
|
|
|
31
31
|
...value,
|
|
32
32
|
access_profile_id: value.access_profile_id ?? value.accessProfileId,
|
|
33
33
|
field_mode: value.field_mode ?? value.fieldMode ?? value.mode,
|
|
34
|
-
build_mode: value.build_mode ?? value.buildMode,
|
|
35
|
-
script_source: value.script_source ?? value.scriptSource,
|
|
36
|
-
recipe_override: value.recipe_override ?? value.recipeOverride,
|
|
37
34
|
max_preview_rows: value.max_preview_rows ?? value.maxPreviewRows,
|
|
38
35
|
goal: value.goal ?? value.description,
|
|
39
36
|
};
|
|
@@ -42,13 +39,9 @@ const TaskCreateInputSchema = z.preprocess((raw) => {
|
|
|
42
39
|
url: z.string().url(),
|
|
43
40
|
access_profile_id: z.string().uuid().optional(),
|
|
44
41
|
field_mode: z.enum(['auto', 'manual']).optional(),
|
|
45
|
-
build_mode: z.enum(['selectors', 'flow', 'code', 'auto']).optional().default('auto'),
|
|
46
|
-
script_source: z.string().min(1).optional(),
|
|
47
|
-
recipe_override: z.any().optional(),
|
|
48
42
|
goal: z.string().min(1).optional(),
|
|
49
43
|
fields: z.array(FieldSchema).optional(),
|
|
50
44
|
max_preview_rows: z.number().int().positive().optional().default(10),
|
|
51
|
-
advanced: z.boolean().optional().default(false),
|
|
52
45
|
})
|
|
53
46
|
.superRefine((data, ctx) => {
|
|
54
47
|
const effectiveFieldMode = data.field_mode ?? (data.fields?.length ? 'manual' : 'auto');
|
|
@@ -79,39 +72,6 @@ const TaskPublishInputSchema = z.preprocess((raw) => {
|
|
|
79
72
|
const TaskDraftDeleteInputSchema = z.object({
|
|
80
73
|
preview_id: z.string().uuid(),
|
|
81
74
|
});
|
|
82
|
-
const TaskRepairInputSchema = z.preprocess((raw) => {
|
|
83
|
-
if (!raw || typeof raw !== 'object')
|
|
84
|
-
return raw;
|
|
85
|
-
const value = raw;
|
|
86
|
-
return {
|
|
87
|
-
...value,
|
|
88
|
-
hint: value.hint ?? value.what_is_wrong ?? value.whatIsWrong ?? value.issue,
|
|
89
|
-
};
|
|
90
|
-
}, z
|
|
91
|
-
.object({
|
|
92
|
-
task_id: z.string().uuid(),
|
|
93
|
-
hint: z.string().min(1).max(2000).optional(),
|
|
94
|
-
force: z.boolean().optional().default(false),
|
|
95
|
-
})
|
|
96
|
-
.passthrough());
|
|
97
|
-
const TaskRecipeVersionsInputSchema = z.object({
|
|
98
|
-
task_id: z.string().uuid(),
|
|
99
|
-
});
|
|
100
|
-
const TaskSetRecipeVersionInputSchema = z
|
|
101
|
-
.object({
|
|
102
|
-
task_id: z.string().uuid(),
|
|
103
|
-
recipe_version_id: z.string().uuid().optional(),
|
|
104
|
-
version: z.coerce.number().int().positive().optional(),
|
|
105
|
-
})
|
|
106
|
-
.superRefine((data, ctx) => {
|
|
107
|
-
if (!data.recipe_version_id && !data.version) {
|
|
108
|
-
ctx.addIssue({
|
|
109
|
-
code: z.ZodIssueCode.custom,
|
|
110
|
-
message: 'Provide recipe_version_id or version.',
|
|
111
|
-
path: ['recipe_version_id'],
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
});
|
|
115
75
|
const AccessProfileHeaderSchema = z.object({
|
|
116
76
|
name: z.string().min(1),
|
|
117
77
|
value: z.string().min(1),
|
|
@@ -185,9 +145,6 @@ export const ToolInputs = {
|
|
|
185
145
|
task_build_status: TaskBuildStatusInputSchema,
|
|
186
146
|
task_publish: TaskPublishInputSchema,
|
|
187
147
|
task_draft_delete: TaskDraftDeleteInputSchema,
|
|
188
|
-
task_repair: TaskRepairInputSchema,
|
|
189
|
-
task_recipe_versions: TaskRecipeVersionsInputSchema,
|
|
190
|
-
task_set_recipe_version: TaskSetRecipeVersionInputSchema,
|
|
191
148
|
// Runs
|
|
192
149
|
run_create: z.object({
|
|
193
150
|
task_id: z.string().uuid(),
|
|
@@ -241,13 +198,9 @@ export function toolDefinitions() {
|
|
|
241
198
|
url: { type: 'string', description: 'The page to analyze.' },
|
|
242
199
|
access_profile_id: { type: 'string', description: 'Optional access profile ID for logged-in/session scraping.' },
|
|
243
200
|
field_mode: { type: 'string', enum: ['auto', 'manual'], description: 'auto = infer fields; manual = use provided fields.' },
|
|
244
|
-
build_mode: { type: 'string', enum: ['selectors', 'flow', 'code', 'auto'], description: 'How to build the draft (default: auto).' },
|
|
245
|
-
script_source: { type: 'string', description: 'Optional Playwright script source (code mode). If provided, the server previews this exact script.' },
|
|
246
|
-
recipe_override: { type: 'object', description: 'Optional base recipe to reuse when previewing custom scripts (keeps flow/dom settings).' },
|
|
247
201
|
goal: { type: 'string', description: 'Optional short description of what to extract (helps in auto mode).' },
|
|
248
202
|
fields: { type: 'array', items: { type: 'object' }, description: 'Fields to extract (required in manual mode).' },
|
|
249
203
|
max_preview_rows: { type: 'number', description: 'How many sample rows to generate in the preview (default: 10).' },
|
|
250
|
-
advanced: { type: 'boolean', description: 'Allow more complex extraction strategies (may take longer).' },
|
|
251
204
|
},
|
|
252
205
|
required: ['url'],
|
|
253
206
|
},
|
|
@@ -283,41 +236,6 @@ export function toolDefinitions() {
|
|
|
283
236
|
required: ['preview_id'],
|
|
284
237
|
},
|
|
285
238
|
},
|
|
286
|
-
{
|
|
287
|
-
name: 'task_repair',
|
|
288
|
-
description: 'Repair a saved task by re-running the agent builder (creates a new recipe version and updates the task in place). Returns build_job_id.',
|
|
289
|
-
inputSchema: {
|
|
290
|
-
type: 'object',
|
|
291
|
-
properties: {
|
|
292
|
-
task_id: { type: 'string', description: 'Task ID.' },
|
|
293
|
-
hint: { type: 'string', description: 'Optional note about what is wrong (helps steer the repair).' },
|
|
294
|
-
force: { type: 'boolean', description: 'If true, queue even if a build is already in progress.' },
|
|
295
|
-
},
|
|
296
|
-
required: ['task_id'],
|
|
297
|
-
},
|
|
298
|
-
},
|
|
299
|
-
{
|
|
300
|
-
name: 'task_recipe_versions',
|
|
301
|
-
description: 'List recipe versions for a task (for rollback/version switching).',
|
|
302
|
-
inputSchema: {
|
|
303
|
-
type: 'object',
|
|
304
|
-
properties: { task_id: { type: 'string', description: 'Task ID.' } },
|
|
305
|
-
required: ['task_id'],
|
|
306
|
-
},
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
name: 'task_set_recipe_version',
|
|
310
|
-
description: 'Switch a task to use a specific recipe version (by recipe_version_id or by version number).',
|
|
311
|
-
inputSchema: {
|
|
312
|
-
type: 'object',
|
|
313
|
-
properties: {
|
|
314
|
-
task_id: { type: 'string', description: 'Task ID.' },
|
|
315
|
-
recipe_version_id: { type: 'string', description: 'Recipe version ID.' },
|
|
316
|
-
version: { type: 'number', description: 'Recipe version number (1, 2, 3, ...).' },
|
|
317
|
-
},
|
|
318
|
-
required: ['task_id'],
|
|
319
|
-
},
|
|
320
|
-
},
|
|
321
239
|
// Runs
|
|
322
240
|
{
|
|
323
241
|
name: 'run_create',
|
|
@@ -472,13 +390,9 @@ export function toolHandlers(api, maxChars) {
|
|
|
472
390
|
url: input.url,
|
|
473
391
|
access_profile_id: input.access_profile_id,
|
|
474
392
|
field_mode: effectiveFieldMode,
|
|
475
|
-
build_mode: input.build_mode,
|
|
476
|
-
...(input.script_source ? { script_source: input.script_source } : {}),
|
|
477
|
-
...(input.recipe_override ? { recipe_override: input.recipe_override } : {}),
|
|
478
393
|
goal: input.goal,
|
|
479
394
|
fields: input.fields,
|
|
480
395
|
maxPreviewRows: input.max_preview_rows,
|
|
481
|
-
advanced: input.advanced,
|
|
482
396
|
debug: false,
|
|
483
397
|
},
|
|
484
398
|
});
|
|
@@ -523,35 +437,6 @@ export function toolHandlers(api, maxChars) {
|
|
|
523
437
|
});
|
|
524
438
|
return asTextResult(result, maxChars);
|
|
525
439
|
},
|
|
526
|
-
task_repair: async (raw) => {
|
|
527
|
-
const input = ToolInputs.task_repair.parse(raw);
|
|
528
|
-
const result = await api.request({
|
|
529
|
-
method: 'POST',
|
|
530
|
-
path: `/v1/agent/tasks/${input.task_id}/repair`,
|
|
531
|
-
body: input.hint || input.force ? { hint: input.hint, force: input.force } : {},
|
|
532
|
-
});
|
|
533
|
-
return asTextResult(result, maxChars);
|
|
534
|
-
},
|
|
535
|
-
task_recipe_versions: async (raw) => {
|
|
536
|
-
const input = ToolInputs.task_recipe_versions.parse(raw);
|
|
537
|
-
const result = await api.request({
|
|
538
|
-
method: 'GET',
|
|
539
|
-
path: `/v1/tasks/${input.task_id}/recipe-versions`,
|
|
540
|
-
});
|
|
541
|
-
return asTextResult(result, maxChars);
|
|
542
|
-
},
|
|
543
|
-
task_set_recipe_version: async (raw) => {
|
|
544
|
-
const input = ToolInputs.task_set_recipe_version.parse(raw);
|
|
545
|
-
const result = await api.request({
|
|
546
|
-
method: 'POST',
|
|
547
|
-
path: `/v1/tasks/${input.task_id}/recipe-version`,
|
|
548
|
-
body: {
|
|
549
|
-
recipe_version_id: input.recipe_version_id,
|
|
550
|
-
version: input.version,
|
|
551
|
-
},
|
|
552
|
-
});
|
|
553
|
-
return asTextResult(result, maxChars);
|
|
554
|
-
},
|
|
555
440
|
run_create: async (raw) => {
|
|
556
441
|
const input = ToolInputs.run_create.parse(raw);
|
|
557
442
|
const result = await api.request({
|