webpeel 0.18.0 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,361 +62,130 @@ function extractFontsFromContent(content) {
62
62
  return [...new Set(fonts)].slice(0, 5);
63
63
  }
64
64
  // ---------------------------------------------------------------------------
65
- // Tool definitions (subset of the full MCP server tools, used for hosted mode)
65
+ // Tool definitions 7 consolidated tools (matches src/mcp/server.ts)
66
66
  // ---------------------------------------------------------------------------
67
67
  function getTools() {
68
68
  return [
69
69
  {
70
- name: 'webpeel_fetch',
71
- description: 'Fetch any URL and return clean markdown content. Use budget=4000 to get token-efficient output (strips boilerplate, compresses tables). Handles JavaScript rendering and bot detection automatically. Use readable=true for article-only content, question="..." for instant Q&A.',
72
- annotations: { title: 'Fetch Web Page', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
70
+ name: 'webpeel',
71
+ description: "Your complete web toolkit. Describe what you want in plain language. " +
72
+ "Examples: 'read https://stripe.com', 'screenshot bbc.com on mobile', " +
73
+ "'find best AI frameworks', 'extract prices from stripe.com/pricing', " +
74
+ "'watch stripe.com/pricing for changes'",
75
+ annotations: { title: 'WebPeel Smart Web Tool', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
73
76
  inputSchema: {
74
77
  type: 'object',
75
78
  properties: {
76
- url: { type: 'string', description: 'URL to fetch' },
77
- format: { type: 'string', enum: ['markdown', 'html', 'text'], description: 'Output format (default: markdown)', default: 'markdown' },
78
- render: { type: 'boolean', description: 'Use browser rendering for JavaScript-heavy sites', default: false },
79
- stealth: { type: 'boolean', description: 'Stealth mode for bot-protected sites (Amazon, LinkedIn, etc.)', default: false },
80
- readable: { type: 'boolean', description: 'Reader mode — extract only article content, strip all noise', default: false },
81
- question: { type: 'string', description: 'Ask a question about the content (BM25, no LLM needed). Returns the most relevant passages.' },
82
- budget: { type: 'number', description: 'Smart token budget — distill content to N tokens' },
83
- lite: { type: 'boolean', description: 'Lite mode — minimal processing, maximum speed', default: false },
84
- selector: { type: 'string', description: 'CSS selector to extract specific content' },
85
- screenshot: { type: 'boolean', description: 'Also take a screenshot', default: false },
86
- wait: { type: 'number', description: 'Milliseconds to wait for dynamic content', default: 0 },
87
- maxTokens: { type: 'number', description: 'Maximum token count for output' },
88
- images: { type: 'boolean', description: 'Extract image URLs', default: false },
89
- inlineExtract: {
90
- type: 'object',
91
- description: 'Inline LLM-powered JSON extraction (BYOK). Provide schema and/or prompt.',
92
- properties: {
93
- schema: { type: 'object', description: 'JSON Schema for desired output' },
94
- prompt: { type: 'string', description: 'Extraction prompt' },
95
- },
96
- },
97
- llmProvider: { type: 'string', enum: ['openai', 'anthropic', 'google'], description: 'LLM provider for inline extraction' },
98
- llmApiKey: { type: 'string', description: 'LLM API key (BYOK) for inline extraction' },
99
- llmModel: { type: 'string', description: 'LLM model name (optional)' },
100
- actions: {
101
- type: 'array',
102
- items: {
103
- type: 'object',
104
- properties: {
105
- type: { type: 'string', enum: ['click', 'type', 'fill', 'scroll', 'wait', 'press', 'hover', 'select', 'waitForSelector', 'screenshot'] },
106
- selector: { type: 'string' },
107
- value: { type: 'string' },
108
- text: { type: 'string' },
109
- key: { type: 'string' },
110
- milliseconds: { type: 'number' },
111
- ms: { type: 'number' },
112
- direction: { type: 'string', enum: ['up', 'down', 'left', 'right'] },
113
- amount: { type: 'number' },
114
- timeout: { type: 'number' },
115
- },
116
- required: ['type'],
117
- },
118
- description: 'Page actions to execute before extraction (auto-enables browser rendering)',
119
- },
79
+ task: { type: 'string', description: 'Plain English description of what you want to do with the web.' },
120
80
  },
121
- required: ['url'],
81
+ required: ['task'],
122
82
  },
123
83
  },
124
84
  {
125
- name: 'webpeel_search',
126
- description: 'Search the web and return structured results with titles, URLs, and snippets. No API key needed.',
127
- annotations: { title: 'Search the Web', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
85
+ name: 'webpeel_read',
86
+ description: 'Read any URL and return clean markdown. Handles web pages, YouTube videos, and PDFs automatically. Use question= for Q&A about the page, summary=true for a summary.',
87
+ annotations: { title: 'Read Web Page', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
128
88
  inputSchema: {
129
89
  type: 'object',
130
90
  properties: {
131
- query: { type: 'string', description: 'Search query' },
132
- count: { type: 'number', description: 'Number of results (1-10)', default: 5 },
91
+ url: { type: 'string', description: 'URL to fetch' },
92
+ format: { type: 'string', enum: ['markdown', 'text', 'html'], description: 'Output format (default: markdown)', default: 'markdown' },
93
+ render: { type: 'boolean', description: 'Force browser rendering for JS-heavy sites', default: false },
94
+ question: { type: 'string', description: 'Ask a question about the page content (BM25, no LLM needed)' },
95
+ summary: { type: 'boolean', description: 'Return a summary instead of full content', default: false },
96
+ budget: { type: 'number', description: 'Smart token budget — distill content to N tokens' },
97
+ readable: { type: 'boolean', description: 'Reader mode — extract only article content', default: false },
133
98
  },
134
- required: ['query'],
99
+ required: ['url'],
135
100
  },
136
101
  },
137
102
  {
138
- name: 'webpeel_crawl',
139
- description: 'Crawl a website starting from a URL. Returns content for all discovered pages up to the specified depth/limit.',
140
- annotations: { title: 'Crawl Website', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
103
+ name: 'webpeel_see',
104
+ description: "See any page visually. Returns a screenshot. Use mode='design' for design analysis, mode='compare' with compare_url for visual comparison.",
105
+ annotations: { title: 'See Page Visually', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
141
106
  inputSchema: {
142
107
  type: 'object',
143
108
  properties: {
144
- url: { type: 'string', description: 'Starting URL' },
145
- maxPages: { type: 'number', default: 10, minimum: 1, maximum: 100 },
146
- maxDepth: { type: 'number', default: 2, minimum: 1, maximum: 5 },
147
- render: { type: 'boolean', default: false },
109
+ url: { type: 'string', description: 'URL to screenshot' },
110
+ mode: { type: 'string', enum: ['screenshot', 'design', 'compare'], description: "Mode: 'screenshot' (default), 'design' (analysis), 'compare' (visual diff)", default: 'screenshot' },
111
+ compare_url: { type: 'string', description: "Second URL to compare against (for mode='compare')" },
112
+ viewport: { type: 'string', enum: ['mobile', 'tablet', 'desktop'], description: 'Viewport size preset' },
113
+ full_page: { type: 'boolean', description: 'Capture the full scrollable page', default: false },
148
114
  },
149
115
  required: ['url'],
150
116
  },
151
117
  },
152
118
  {
153
- name: 'webpeel_map',
154
- description: 'Discover all URLs on a domain via sitemap and link crawling. Returns a structured URL list.',
155
- annotations: { title: 'Map Website URLs', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
119
+ name: 'webpeel_find',
120
+ description: "Find anything on the web. Pass a query to search, or a url to discover all pages on that domain. Use depth='deep' for multi-source research.",
121
+ annotations: { title: 'Find on the Web', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
156
122
  inputSchema: {
157
123
  type: 'object',
158
124
  properties: {
159
- url: { type: 'string', description: 'Starting URL or domain' },
160
- maxUrls: { type: 'number', default: 5000, minimum: 1, maximum: 10000 },
125
+ query: { type: 'string', description: 'Search query' },
126
+ url: { type: 'string', description: 'Domain URL to map/discover all pages' },
127
+ depth: { type: 'string', enum: ['quick', 'deep'], description: "Search depth: 'quick' = single search, 'deep' = multi-source research", default: 'quick' },
128
+ limit: { type: 'number', description: 'Max results to return (default: 5)', default: 5 },
161
129
  },
162
- required: ['url'],
163
130
  },
164
131
  },
165
132
  {
166
133
  name: 'webpeel_extract',
167
- description: 'Extract structured data from a URL using CSS selectors, JSON Schema, or LLM. Returns typed key-value pairs.',
134
+ description: "Extract structured data from any URL. Pass fields=['price','title'] for specific data, or omit for auto-detection. Returns typed JSON.",
168
135
  annotations: { title: 'Extract Structured Data', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
169
136
  inputSchema: {
170
137
  type: 'object',
171
138
  properties: {
172
139
  url: { type: 'string', description: 'URL to extract from' },
173
- selectors: { type: 'object', description: 'Map of field names to CSS selectors' },
174
- prompt: { type: 'string', description: 'Natural language prompt for AI extraction' },
175
- llmApiKey: { type: 'string', description: 'API key for LLM extraction' },
176
- },
177
- required: ['url'],
178
- },
179
- },
180
- {
181
- name: 'webpeel_batch',
182
- description: 'Fetch multiple URLs concurrently. Pass an array of URLs, get back an array of results.',
183
- annotations: { title: 'Batch Fetch URLs', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
184
- inputSchema: {
185
- type: 'object',
186
- properties: {
187
- urls: { type: 'array', items: { type: 'string' }, description: 'URLs to fetch' },
188
- concurrency: { type: 'number', default: 3, minimum: 1, maximum: 10 },
189
- format: { type: 'string', enum: ['markdown', 'text', 'html'], default: 'markdown' },
190
- },
191
- required: ['urls'],
192
- },
193
- },
194
- {
195
- name: 'webpeel_research',
196
- description: 'Multi-step web research: searches the web, fetches top sources, follows leads, and synthesizes findings into a report with citations.',
197
- annotations: { title: 'Deep Research Agent', readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
198
- inputSchema: {
199
- type: 'object',
200
- properties: {
201
- query: { type: 'string', description: 'Research question or topic to investigate' },
202
- maxSources: { type: 'number', description: 'Maximum number of sources to consult (default: 5)', default: 5, minimum: 1, maximum: 20 },
203
- maxDepth: { type: 'number', description: 'Link-following depth: 1 = search results only, 2+ = follow links within top sources (default: 1)', default: 1, minimum: 1, maximum: 3 },
204
- llmApiKey: { type: 'string', description: 'LLM API key for synthesis (falls back to OPENAI_API_KEY env var)' },
205
- llmModel: { type: 'string', description: 'LLM model to use for synthesis (default: gpt-4o-mini)' },
206
- llmBaseUrl: { type: 'string', description: 'LLM API base URL (default: https://api.openai.com/v1)' },
207
- outputFormat: { type: 'string', enum: ['report', 'sources'], description: 'Output format: "report" = synthesized markdown report (needs LLM key), "sources" = raw extracted source content', default: 'report' },
208
- timeout: { type: 'number', description: 'Maximum research time in milliseconds (default: 60000)', default: 60000 },
209
- },
210
- required: ['query'],
211
- },
212
- },
213
- {
214
- name: 'webpeel_screenshot',
215
- description: 'Take a screenshot of any URL. Returns a PNG image. Supports full-page capture and viewport sizing.',
216
- annotations: { title: 'Take Screenshot', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
217
- inputSchema: {
218
- type: 'object',
219
- properties: {
220
- url: { type: 'string', description: 'The URL to screenshot' },
221
- fullPage: { type: 'boolean', description: 'Capture the full scrollable page (default: viewport only)', default: false },
222
- width: { type: 'number', description: 'Viewport width in pixels (default: 1280)', default: 1280, minimum: 100, maximum: 5000 },
223
- height: { type: 'number', description: 'Viewport height in pixels (default: 720)', default: 720, minimum: 100, maximum: 5000 },
224
- format: { type: 'string', enum: ['png', 'jpeg'], description: 'Image format (default: png)', default: 'png' },
225
- quality: { type: 'number', description: 'JPEG quality 1-100 (ignored for PNG)', minimum: 1, maximum: 100 },
226
- waitFor: { type: 'number', description: 'Milliseconds to wait after page load before screenshot', default: 0 },
227
- stealth: { type: 'boolean', description: 'Use stealth mode to bypass bot detection', default: false },
228
- actions: { type: 'array', items: { type: 'object' }, description: 'Page actions to execute before screenshot' },
229
- },
230
- required: ['url'],
231
- },
232
- },
233
- {
234
- name: 'webpeel_design_analysis',
235
- description: 'Extract visual design intelligence from a URL — palette, typography, layout, quality signals.',
236
- annotations: { title: 'Analyze Visual Design', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
237
- inputSchema: {
238
- type: 'object',
239
- properties: {
240
- url: { type: 'string', description: 'URL to analyze' },
241
- },
242
- required: ['url'],
243
- },
244
- },
245
- {
246
- name: 'webpeel_design_compare',
247
- description: 'Compare the visual design of two URLs — returns structured gap analysis.',
248
- annotations: { title: 'Compare Visual Design', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
249
- inputSchema: {
250
- type: 'object',
251
- properties: {
252
- url1: { type: 'string', description: 'First URL to compare (subject)' },
253
- url2: { type: 'string', description: 'Second URL to compare (reference)' },
254
- },
255
- required: ['url1', 'url2'],
256
- },
257
- },
258
- {
259
- name: 'webpeel_summarize',
260
- description: 'Generate an AI summary of a URL\'s content. Requires an LLM API key (BYOK).',
261
- annotations: { title: 'Summarize Page', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
262
- inputSchema: {
263
- type: 'object',
264
- properties: {
265
- url: { type: 'string', description: 'URL to summarize' },
266
- llmApiKey: { type: 'string', description: 'API key for LLM (OpenAI-compatible)' },
267
- prompt: { type: 'string', description: 'Custom summary prompt (default: "Summarize this webpage in 2-3 sentences.")', default: 'Summarize this webpage in 2-3 sentences.' },
268
- llmModel: { type: 'string', description: 'LLM model to use (default: gpt-4o-mini)', default: 'gpt-4o-mini' },
269
- llmBaseUrl: { type: 'string', description: 'LLM API base URL (default: https://api.openai.com/v1)', default: 'https://api.openai.com/v1' },
270
- render: { type: 'boolean', description: 'Use browser rendering', default: false },
271
- },
272
- required: ['url', 'llmApiKey'],
273
- },
274
- },
275
- {
276
- name: 'webpeel_answer',
277
- description: 'Ask a question about a URL and get an AI-generated answer with citations. Requires an LLM API key (BYOK). For LLM-free Q&A, use webpeel_quick_answer instead.',
278
- annotations: { title: 'Answer a Question', readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
279
- inputSchema: {
280
- type: 'object',
281
- properties: {
282
- question: { type: 'string', description: 'The question to answer' },
283
- searchProvider: { type: 'string', enum: ['duckduckgo', 'brave', 'stealth', 'google'], description: 'Search provider (default: duckduckgo)', default: 'duckduckgo' },
284
- searchApiKey: { type: 'string', description: 'API key for Brave Search (required when searchProvider is "brave")' },
285
- llmProvider: { type: 'string', enum: ['openai', 'anthropic', 'google'], description: 'LLM provider to use for answer generation' },
286
- llmApiKey: { type: 'string', description: 'API key for the LLM provider (BYOK)' },
287
- llmModel: { type: 'string', description: 'LLM model name (optional, uses provider default)' },
288
- maxSources: { type: 'number', description: 'Maximum number of sources to fetch (1-10, default 5)', default: 5, minimum: 1, maximum: 10 },
289
- },
290
- required: ['question', 'llmProvider', 'llmApiKey'],
291
- },
292
- },
293
- {
294
- name: 'webpeel_brand',
295
- description: 'Extract branding assets from a URL: logo, colors, fonts, and social links.',
296
- annotations: { title: 'Extract Branding', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
297
- inputSchema: {
298
- type: 'object',
299
- properties: {
300
- url: { type: 'string', description: 'URL to extract branding from' },
301
- render: { type: 'boolean', description: 'Use browser rendering', default: false },
302
- },
303
- required: ['url'],
304
- },
305
- },
306
- {
307
- name: 'webpeel_change_track',
308
- description: 'Track content changes on a URL. First call saves a snapshot, subsequent calls show what changed.',
309
- annotations: { title: 'Track Page Changes', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
310
- inputSchema: {
311
- type: 'object',
312
- properties: {
313
- url: { type: 'string', description: 'URL to track for changes' },
314
- render: { type: 'boolean', description: 'Use browser rendering', default: false },
315
- },
316
- required: ['url'],
317
- },
318
- },
319
- {
320
- name: 'webpeel_deep_fetch',
321
- description: 'Search + fetch + analyze in one call. Fetches multiple sources for a query, scores by relevance, deduplicates facts, and merges into structured intelligence. No LLM key needed. Supports \'comparison\' format for vs-queries.',
322
- annotations: { title: 'Deep Fetch Research', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
323
- inputSchema: {
324
- type: 'object',
325
- properties: {
326
- query: { type: 'string', description: 'Search query to research' },
327
- count: { type: 'number', description: 'Number of top results to fetch (default: 5, max: 10)', default: 5, minimum: 1, maximum: 10 },
328
- format: { type: 'string', enum: ['markdown', 'text', 'comparison'], description: 'Content format (default: markdown). Use "comparison" for vs-queries to get a side-by-side structure.', default: 'markdown' },
329
- },
330
- required: ['query'],
331
- },
332
- },
333
- {
334
- name: 'webpeel_youtube',
335
- description: 'Extract the full transcript from a YouTube video. Returns timestamped segments and video metadata. No API key needed. Supports all YouTube URL formats.',
336
- annotations: { title: 'Extract YouTube Transcript', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
337
- inputSchema: {
338
- type: 'object',
339
- properties: {
340
- url: { type: 'string', description: 'YouTube video URL (supports youtube.com/watch, youtu.be, embed, shorts, and mobile URLs)' },
341
- language: { type: 'string', description: 'Preferred transcript language code (default: en). Falls back to any available language if not found.' },
140
+ schema: { type: 'object', description: 'JSON schema describing desired output structure' },
141
+ fields: { type: 'array', items: { type: 'string' }, description: "Specific fields to extract, e.g. ['price', 'title', 'description']" },
142
+ format: { type: 'string', enum: ['json', 'markdown'], description: 'Output format (default: json)', default: 'json' },
342
143
  },
343
144
  required: ['url'],
344
145
  },
345
146
  },
346
147
  {
347
- name: 'webpeel_auto_extract',
348
- description: 'Detect page type and extract structured JSON automatically. Supports pricing pages, product listings, contact info, articles, and API documentation. No LLM needed.',
148
+ name: 'webpeel_monitor',
149
+ description: 'Watch a URL for changes. Returns diff on subsequent calls. Add webhook= for persistent monitoring with notifications.',
150
+ annotations: { title: 'Monitor URL for Changes', readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
349
151
  inputSchema: {
350
152
  type: 'object',
351
153
  properties: {
352
- url: { type: 'string', description: 'URL to fetch and auto-extract structured data from' },
154
+ url: { type: 'string', description: 'URL to monitor' },
155
+ webhook: { type: 'string', description: 'Webhook URL to notify when content changes' },
156
+ interval: { type: 'string', description: "Check interval, e.g. '1h', '30m', '1d'", default: '1h' },
157
+ selector: { type: 'string', description: 'CSS selector to monitor a specific part of the page' },
353
158
  },
354
159
  required: ['url'],
355
160
  },
356
161
  },
357
162
  {
358
- name: 'webpeel_quick_answer',
359
- description: 'Ask a question about a URL\'s content no LLM key needed. Uses BM25 relevance scoring to find and return the most relevant passages. Returns answer text with confidence score.',
360
- annotations: { title: 'Quick Answer (No LLM)', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
361
- inputSchema: {
362
- type: 'object',
363
- properties: {
364
- url: { type: 'string', description: 'URL to fetch and search' },
365
- question: { type: 'string', description: 'Question to answer from the page content' },
366
- maxPassages: { type: 'number', description: 'Maximum number of relevant passages to return (default: 3)', default: 3, minimum: 1, maximum: 10 },
367
- render: { type: 'boolean', description: 'Use browser rendering', default: false },
368
- },
369
- required: ['url', 'question'],
370
- },
371
- },
372
- {
373
- name: 'webpeel_watch',
374
- description: 'Monitor a URL for changes with webhook notifications. Create persistent watchers that check on a schedule and alert when content changes.',
375
- inputSchema: {
376
- type: 'object',
377
- properties: {
378
- action: { type: 'string', enum: ['create', 'list', 'check', 'delete'], description: 'Watch action to perform' },
379
- url: { type: 'string', description: 'URL to monitor (for create)' },
380
- id: { type: 'string', description: 'Watch ID (for check/delete)' },
381
- webhookUrl: { type: 'string', description: 'Webhook URL to notify on changes (for create)' },
382
- intervalMinutes: { type: 'number', description: 'Check interval in minutes (default: 60)' },
383
- selector: { type: 'string', description: 'CSS selector to monitor specific content (optional)' },
384
- },
385
- required: ['action'],
386
- },
387
- },
388
- {
389
- name: 'webpeel_hotels',
390
- description: 'Search multiple travel sites for hotels in parallel. Returns sorted results from Kayak, Booking.com, Google Travel, and Expedia.',
163
+ name: 'webpeel_act',
164
+ description: 'Interact with a web page. Click buttons, fill forms, navigate. Returns screenshot + extracted content after actions complete.',
165
+ annotations: { title: 'Act on Web Page', readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
391
166
  inputSchema: {
392
167
  type: 'object',
393
168
  properties: {
394
- destination: { type: 'string', description: 'Destination city or area (e.g., "Manhattan", "Paris")' },
395
- checkin: { type: 'string', description: 'Check-in date (ISO or natural language like "tomorrow")' },
396
- checkout: { type: 'string', description: 'Check-out date. Defaults to day after checkin.' },
397
- sort: { type: 'string', enum: ['price', 'rating', 'value'], description: 'Sort order (default: price)' },
398
- limit: { type: 'number', description: 'Max results (default: 20)' },
399
- },
400
- required: ['destination'],
401
- },
402
- },
403
- {
404
- name: 'agent',
405
- description: 'Web data agent — search, fetch, and extract structured data in one call. Give it a prompt and URLs or search query, get back clean structured results. Works without an LLM key using BM25 extraction.',
406
- annotations: { title: 'Web Data Agent', readOnlyHint: true, destructiveHint: false, idempotentHint: false, openWorldHint: true },
407
- inputSchema: {
408
- type: 'object',
409
- properties: {
410
- prompt: { type: 'string', description: 'What data do you want? e.g. "Find the CEO and revenue for each company"' },
411
- urls: { type: 'array', items: { type: 'string' }, description: 'URLs to fetch and extract from' },
412
- search: { type: 'string', description: 'Search query to find relevant pages' },
413
- schema: { type: 'object', description: 'Output schema as an object of field names to type strings (e.g. {"company":"string","ceo":"string"})' },
414
- maxResults: { type: 'number', description: 'Max pages to process (default: 5)' },
415
- budget: { type: 'number', description: 'Token budget per page (default: 4000)' },
416
- llmApiKey: { type: 'string', description: 'Your LLM API key for AI extraction (optional — works without it using BM25)' },
417
- llmProvider: { type: 'string', description: 'LLM provider: openai, anthropic, etc. (default: openai)' },
169
+ url: { type: 'string', description: 'URL to interact with' },
170
+ actions: {
171
+ type: 'array',
172
+ description: 'Actions to perform, e.g. [{type:"click",selector:".btn"}, {type:"type",selector:"#q",value:"hello"}]',
173
+ items: {
174
+ type: 'object',
175
+ properties: {
176
+ type: { type: 'string', enum: ['click', 'type', 'fill', 'scroll', 'wait', 'press', 'hover', 'select'] },
177
+ selector: { type: 'string' },
178
+ value: { type: 'string' },
179
+ key: { type: 'string' },
180
+ milliseconds: { type: 'number' },
181
+ },
182
+ required: ['type'],
183
+ },
184
+ },
185
+ extract_after: { type: 'boolean', description: 'Extract content after actions complete', default: true },
186
+ screenshot_after: { type: 'boolean', description: 'Take screenshot after actions complete', default: false },
418
187
  },
419
- required: [],
188
+ required: ['url', 'actions'],
420
189
  },
421
190
  },
422
191
  ];