recker 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +28 -1
  2. package/dist/ai/client-ai.d.ts +41 -0
  3. package/dist/ai/client-ai.js +391 -0
  4. package/dist/ai/index.d.ts +2 -0
  5. package/dist/ai/index.js +2 -0
  6. package/dist/ai/memory.d.ts +35 -0
  7. package/dist/ai/memory.js +136 -0
  8. package/dist/browser/ai/client-ai.d.ts +41 -0
  9. package/dist/browser/ai/client-ai.js +391 -0
  10. package/dist/browser/ai/memory.d.ts +35 -0
  11. package/dist/browser/ai/memory.js +136 -0
  12. package/dist/browser/core/client.d.ts +6 -1
  13. package/dist/browser/core/client.js +18 -0
  14. package/dist/browser/transport/undici.js +11 -2
  15. package/dist/browser/types/ai-client.d.ts +32 -0
  16. package/dist/browser/types/ai-client.js +1 -0
  17. package/dist/browser/types/ai.d.ts +1 -1
  18. package/dist/cli/index.js +402 -1
  19. package/dist/cli/tui/scroll-buffer.js +4 -4
  20. package/dist/cli/tui/shell.d.ts +3 -0
  21. package/dist/cli/tui/shell.js +166 -19
  22. package/dist/core/client.d.ts +6 -1
  23. package/dist/core/client.js +18 -0
  24. package/dist/mcp/server.js +15 -0
  25. package/dist/mcp/tools/scrape.d.ts +3 -0
  26. package/dist/mcp/tools/scrape.js +156 -0
  27. package/dist/mcp/tools/security.d.ts +3 -0
  28. package/dist/mcp/tools/security.js +471 -0
  29. package/dist/mcp/tools/seo.d.ts +3 -0
  30. package/dist/mcp/tools/seo.js +427 -0
  31. package/dist/presets/anthropic.d.ts +3 -1
  32. package/dist/presets/anthropic.js +11 -1
  33. package/dist/presets/azure-openai.d.ts +3 -1
  34. package/dist/presets/azure-openai.js +11 -1
  35. package/dist/presets/cohere.d.ts +3 -1
  36. package/dist/presets/cohere.js +8 -2
  37. package/dist/presets/deepseek.d.ts +3 -1
  38. package/dist/presets/deepseek.js +8 -2
  39. package/dist/presets/fireworks.d.ts +3 -1
  40. package/dist/presets/fireworks.js +8 -2
  41. package/dist/presets/gemini.d.ts +3 -1
  42. package/dist/presets/gemini.js +8 -1
  43. package/dist/presets/groq.d.ts +3 -1
  44. package/dist/presets/groq.js +8 -2
  45. package/dist/presets/huggingface.d.ts +3 -1
  46. package/dist/presets/huggingface.js +8 -1
  47. package/dist/presets/mistral.d.ts +3 -1
  48. package/dist/presets/mistral.js +8 -2
  49. package/dist/presets/openai.d.ts +3 -1
  50. package/dist/presets/openai.js +9 -2
  51. package/dist/presets/perplexity.d.ts +3 -1
  52. package/dist/presets/perplexity.js +8 -2
  53. package/dist/presets/registry.d.ts +4 -0
  54. package/dist/presets/registry.js +48 -0
  55. package/dist/presets/replicate.d.ts +3 -1
  56. package/dist/presets/replicate.js +8 -1
  57. package/dist/presets/together.d.ts +3 -1
  58. package/dist/presets/together.js +8 -2
  59. package/dist/presets/xai.d.ts +3 -1
  60. package/dist/presets/xai.js +8 -2
  61. package/dist/scrape/spider.js +1 -1
  62. package/dist/transport/undici.js +11 -2
  63. package/dist/types/ai-client.d.ts +32 -0
  64. package/dist/types/ai-client.js +1 -0
  65. package/dist/types/ai.d.ts +1 -1
  66. package/dist/utils/colors.d.ts +2 -0
  67. package/dist/utils/colors.js +4 -0
  68. package/package.json +1 -1
@@ -16,6 +16,7 @@ import { getShellSearch } from './shell-search.js';
16
16
  import { openSearchPanel } from './search-panel.js';
17
17
  import { ScrollBuffer, parseScrollKey, parseMouseScroll, disableMouseReporting } from './scroll-buffer.js';
18
18
  import { analyzeSeo, SeoSpider } from '../../seo/index.js';
19
+ import { resolvePreset } from '../presets.js';
19
20
  let highlight;
20
21
  async function initDependencies() {
21
22
  if (!highlight) {
@@ -43,6 +44,7 @@ export class RekShell {
43
44
  scrollBuffer;
44
45
  originalStdoutWrite = null;
45
46
  inScrollMode = false;
47
+ aiClients = new Map();
46
48
  constructor() {
47
49
  this.client = createClient({
48
50
  baseUrl: 'http://localhost',
@@ -94,8 +96,10 @@ export class RekShell {
94
96
  const commands = [
95
97
  'get', 'post', 'put', 'delete', 'patch', 'head', 'options',
96
98
  'ws', 'udp', 'load', 'chat', 'ai',
99
+ '@openai', '@anthropic', '@groq', '@google', '@xai', '@mistral', '@cohere', '@deepseek', '@fireworks', '@together', '@perplexity',
100
+ 'ai:clear',
97
101
  'whois', 'tls', 'ssl', 'security', 'ip', 'dns', 'dns:propagate', 'dns:email', 'rdap', 'ping',
98
- 'scrap', 'spider', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
102
+ 'scrap', 'spider', 'seo', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
99
103
  '?', 'search', 'suggest', 'example',
100
104
  'help', 'clear', 'exit', 'set', 'url', 'vars', 'env'
101
105
  ];
@@ -173,20 +177,36 @@ export class RekShell {
173
177
  }
174
178
  return true;
175
179
  }
176
- const scrollKey = parseScrollKey(data);
177
- if (scrollKey) {
178
- if (scrollKey === 'quit') {
179
- if (self.inScrollMode) {
180
+ try {
181
+ const scrollKey = parseScrollKey(data);
182
+ if (scrollKey) {
183
+ if (scrollKey === 'quit') {
184
+ if (self.inScrollMode) {
185
+ self.exitScrollMode();
186
+ return true;
187
+ }
188
+ return originalEmit(event, ...args);
189
+ }
190
+ self.handleScrollKey(scrollKey);
191
+ return true;
192
+ }
193
+ if (self.inScrollMode) {
194
+ if (str === '\x1b[A') {
195
+ self.handleScrollKey('scrollUp');
196
+ return true;
197
+ }
198
+ if (str === '\x1b[B') {
199
+ self.handleScrollKey('scrollDown');
200
+ return true;
201
+ }
202
+ if (str === '\x1b' || str === '\x1b\x1b') {
180
203
  self.exitScrollMode();
181
204
  return true;
182
205
  }
183
- return originalEmit(event, ...args);
206
+ return true;
184
207
  }
185
- self.handleScrollKey(scrollKey);
186
- return true;
187
208
  }
188
- if (self.inScrollMode) {
189
- return true;
209
+ catch {
190
210
  }
191
211
  }
192
212
  return originalEmit(event, ...args);
@@ -194,6 +214,9 @@ export class RekShell {
194
214
  }
195
215
  }
196
216
  handleScrollKey(key) {
217
+ if (!this.originalStdoutWrite) {
218
+ return;
219
+ }
197
220
  let needsRedraw = false;
198
221
  switch (key) {
199
222
  case 'pageUp':
@@ -250,11 +273,15 @@ export class RekShell {
250
273
  enterScrollMode() {
251
274
  if (this.inScrollMode)
252
275
  return;
276
+ if (!this.originalStdoutWrite)
277
+ return;
253
278
  this.inScrollMode = true;
254
- this.rl.pause();
255
- if (this.originalStdoutWrite) {
256
- this.originalStdoutWrite('\x1b[?25l');
279
+ try {
280
+ this.rl.pause();
281
+ }
282
+ catch {
257
283
  }
284
+ this.originalStdoutWrite('\x1b[?25l');
258
285
  this.renderScrollView();
259
286
  }
260
287
  exitScrollMode() {
@@ -288,7 +315,7 @@ export class RekShell {
288
315
  const scrollInfo = this.scrollBuffer.isScrolledUp
289
316
  ? colors.yellow(`↑ ${this.scrollBuffer.position} lines | ${info.percent}% | `)
290
317
  : '';
291
- const helpText = colors.gray('Page Up/Down • Home/End • Q to exit');
318
+ const helpText = colors.gray('↑↓/PgUp/PgDn • Home/End • Esc/Q to exit');
292
319
  const statusBar = `\x1b[${rows};1H\x1b[7m ${scrollInfo}${helpText} \x1b[0m`;
293
320
  this.originalStdoutWrite(statusBar);
294
321
  }
@@ -297,6 +324,21 @@ export class RekShell {
297
324
  this.rl.prompt();
298
325
  }
299
326
  async handleCommand(input) {
327
+ if (input.startsWith('@')) {
328
+ const spaceIdx = input.indexOf(' ');
329
+ if (spaceIdx > 1) {
330
+ const presetName = input.slice(1, spaceIdx).toLowerCase();
331
+ const message = input.slice(spaceIdx + 1).trim();
332
+ if (message) {
333
+ await this.runAIPresetChat(presetName, message);
334
+ return;
335
+ }
336
+ }
337
+ console.log(colors.yellow('Usage: @<preset> <message>'));
338
+ console.log(colors.gray('Example: @openai Hello, how are you?'));
339
+ console.log(colors.gray('Available AI presets: openai, anthropic, groq, google, xai, mistral, cohere'));
340
+ return;
341
+ }
300
342
  if (input.endsWith('?') && !input.startsWith('?') && input.length > 1) {
301
343
  await this.runSearch(input.slice(0, -1).trim());
302
344
  return;
@@ -312,6 +354,9 @@ export class RekShell {
312
354
  case 'clear':
313
355
  console.clear();
314
356
  return;
357
+ case 'ai:clear':
358
+ this.clearAIMemory(parts[1]);
359
+ return;
315
360
  case 'exit':
316
361
  case 'quit':
317
362
  this.rl.close();
@@ -507,6 +552,96 @@ export class RekShell {
507
552
  await startAIChat(rl, provider, apiKey, model);
508
553
  });
509
554
  }
555
+ async runAIPresetChat(presetName, message) {
556
+ try {
557
+ let client = this.aiClients.get(presetName);
558
+ if (!client) {
559
+ const presetConfig = resolvePreset(presetName);
560
+ if (!presetConfig) {
561
+ console.log(colors.red(`Unknown AI preset: @${presetName}`));
562
+ console.log(colors.gray('Available AI presets: openai, anthropic, groq, google, xai, mistral, cohere, deepseek, fireworks, together, perplexity'));
563
+ return;
564
+ }
565
+ if (!presetConfig._aiConfig) {
566
+ console.log(colors.red(`Preset @${presetName} does not support AI features.`));
567
+ console.log(colors.gray('Use an AI preset like @openai, @anthropic, @groq, etc.'));
568
+ return;
569
+ }
570
+ client = createClient(presetConfig);
571
+ this.aiClients.set(presetName, client);
572
+ }
573
+ if (!client.hasAI) {
574
+ console.log(colors.red(`Preset @${presetName} does not have AI capabilities.`));
575
+ return;
576
+ }
577
+ const model = client._aiConfig?.model || presetName;
578
+ console.log(colors.gray(`\n${presetName} (${model}) is thinking...`));
579
+ const stream = await client.ai.chatStream(message);
580
+ process.stdout.write('\n');
581
+ for await (const event of stream) {
582
+ if (event.type === 'text') {
583
+ process.stdout.write(colors.orange(event.content));
584
+ }
585
+ else if (event.type === 'error') {
586
+ console.log(colors.red(`\nError: ${event.error}`));
587
+ }
588
+ }
589
+ const memory = client.ai.getMemory();
590
+ const pairs = Math.floor(memory.length / 2);
591
+ console.log(colors.reset(''));
592
+ console.log(colors.gray(`Memory: ${pairs}/12 pairs (${memory.length} messages)`));
593
+ }
594
+ catch (error) {
595
+ if (error.message?.includes('API key')) {
596
+ console.log(colors.red(`\nMissing API key for @${presetName}`));
597
+ const envVarMap = {
598
+ openai: 'OPENAI_API_KEY',
599
+ anthropic: 'ANTHROPIC_API_KEY',
600
+ google: 'GOOGLE_API_KEY',
601
+ groq: 'GROQ_API_KEY',
602
+ xai: 'XAI_API_KEY',
603
+ mistral: 'MISTRAL_API_KEY',
604
+ cohere: 'COHERE_API_KEY',
605
+ deepseek: 'DEEPSEEK_API_KEY',
606
+ fireworks: 'FIREWORKS_API_KEY',
607
+ together: 'TOGETHER_API_KEY',
608
+ perplexity: 'PERPLEXITY_API_KEY',
609
+ };
610
+ const envVar = envVarMap[presetName] || `${presetName.toUpperCase()}_API_KEY`;
611
+ console.log(colors.gray(`Set ${envVar} environment variable to use this preset.`));
612
+ }
613
+ else {
614
+ console.log(colors.red(`\nError: ${error.message || error}`));
615
+ }
616
+ }
617
+ }
618
+ clearAIMemory(presetName) {
619
+ if (presetName) {
620
+ const client = this.aiClients.get(presetName);
621
+ if (client && client.hasAI) {
622
+ client.ai.clearMemory();
623
+ console.log(colors.green(`Cleared AI memory for @${presetName}`));
624
+ }
625
+ else {
626
+ console.log(colors.yellow(`No active AI session for @${presetName}`));
627
+ }
628
+ }
629
+ else {
630
+ let cleared = 0;
631
+ for (const [name, client] of this.aiClients) {
632
+ if (client.hasAI) {
633
+ client.ai.clearMemory();
634
+ cleared++;
635
+ }
636
+ }
637
+ if (cleared > 0) {
638
+ console.log(colors.green(`Cleared AI memory for ${cleared} preset(s)`));
639
+ }
640
+ else {
641
+ console.log(colors.yellow('No active AI sessions to clear'));
642
+ }
643
+ }
644
+ }
510
645
  async runLoadTest(args) {
511
646
  let targetUrl = '';
512
647
  let users = 50;
@@ -1494,7 +1629,7 @@ ${colors.bold('Network:')}
1494
1629
  }
1495
1630
  async runSpider(args) {
1496
1631
  let url = '';
1497
- let maxDepth = 3;
1632
+ let maxDepth = 5;
1498
1633
  let maxPages = 100;
1499
1634
  let concurrency = 5;
1500
1635
  let seoEnabled = false;
@@ -1502,7 +1637,7 @@ ${colors.bold('Network:')}
1502
1637
  for (let i = 0; i < args.length; i++) {
1503
1638
  const arg = args[i];
1504
1639
  if (arg.startsWith('depth=')) {
1505
- maxDepth = parseInt(arg.split('=')[1]) || 4;
1640
+ maxDepth = parseInt(arg.split('=')[1]) || 5;
1506
1641
  }
1507
1642
  else if (arg.startsWith('limit=')) {
1508
1643
  maxPages = parseInt(arg.split('=')[1]) || 100;
@@ -1524,7 +1659,7 @@ ${colors.bold('Network:')}
1524
1659
  if (!this.baseUrl) {
1525
1660
  console.log(colors.yellow('Usage: spider <url> [options]'));
1526
1661
  console.log(colors.gray(' Options:'));
1527
- console.log(colors.gray(' depth=4 Max crawl depth'));
1662
+ console.log(colors.gray(' depth=5 Max crawl depth'));
1528
1663
  console.log(colors.gray(' limit=100 Max pages to crawl'));
1529
1664
  console.log(colors.gray(' concurrency=5 Concurrent requests'));
1530
1665
  console.log(colors.gray(' seo Enable SEO analysis'));
@@ -2656,6 +2791,17 @@ ${colors.bold('Network:')}
2656
2791
  ${colors.green('ws <url>')} Start interactive WebSocket session.
2657
2792
  ${colors.green('udp <url>')} Send UDP packet.
2658
2793
 
2794
+ ${colors.bold('AI Chat:')}
2795
+ ${colors.green('@openai <message>')} Chat with OpenAI (GPT) with memory.
2796
+ ${colors.green('@anthropic <msg>')} Chat with Anthropic (Claude) with memory.
2797
+ ${colors.green('@groq <message>')} Chat with Groq (fast inference).
2798
+ ${colors.green('@google <message>')} Chat with Google (Gemini).
2799
+ ${colors.green('@xai <message>')} Chat with xAI (Grok).
2800
+ ${colors.green('@mistral <message>')} Chat with Mistral AI.
2801
+ ${colors.gray('Memory:')} ${colors.white('12 pairs (24 messages)')} preserved per preset.
2802
+ ${colors.gray('Env:')} Set ${colors.white('OPENAI_API_KEY')}, ${colors.white('ANTHROPIC_API_KEY')}, etc.
2803
+ ${colors.green('ai:clear [preset]')} Clear AI memory (all or specific preset).
2804
+
2659
2805
  ${colors.bold('Network Tools:')}
2660
2806
  ${colors.green('whois <domain>')} WHOIS lookup (domain or IP).
2661
2807
  ${colors.green('tls <host> [port]')} Inspect TLS/SSL certificate.
@@ -2687,7 +2833,7 @@ ${colors.bold('Network:')}
2687
2833
  ${colors.bold('Web Crawler:')}
2688
2834
  ${colors.green('spider <url>')} Crawl website following internal links.
2689
2835
  ${colors.gray('Options:')}
2690
- ${colors.white('--depth=4')} ${colors.gray('Maximum depth to crawl')}
2836
+ ${colors.white('--depth=5')} ${colors.gray('Maximum depth to crawl')}
2691
2837
  ${colors.white('--limit=100')} ${colors.gray('Maximum pages to crawl')}
2692
2838
  ${colors.white('--concurrency=5')} ${colors.gray('Parallel requests')}
2693
2839
 
@@ -2707,7 +2853,8 @@ ${colors.bold('Network:')}
2707
2853
  › get /json
2708
2854
  › post /post name="Neo" active:=true role:Admin
2709
2855
  › load /heavy-endpoint users=100 mode=stress
2710
- chat openai gpt-5.1
2856
+ @openai What is the capital of France?
2857
+ › @anthropic Explain quantum computing
2711
2858
  › spider example.com depth=2 limit=50
2712
2859
  `);
2713
2860
  }
@@ -1,4 +1,5 @@
1
1
  import { ClientOptions, Middleware, ReckerRequest, ReckerResponse, RequestOptions, CacheStorage, PageResult } from '../types/index.js';
2
+ import type { ClientAI, ClientOptionsWithAI } from '../types/ai-client.js';
2
3
  import { RequestPromise } from './request-promise.js';
3
4
  import { PaginationOptions } from '../plugins/pagination.js';
4
5
  import { RetryOptions } from '../plugins/retry.js';
@@ -36,7 +37,9 @@ export declare class Client {
36
37
  private cookieJar?;
37
38
  private cookieIgnoreInvalid;
38
39
  private defaultTimeout?;
39
- constructor(options?: ExtendedClientOptions);
40
+ private _aiConfig?;
41
+ private _ai?;
42
+ constructor(options?: ExtendedClientOptions & Partial<ClientOptionsWithAI>);
40
43
  private createLoggingMiddleware;
41
44
  private createMaxSizeMiddleware;
42
45
  private setupCookieJar;
@@ -113,6 +116,8 @@ export declare class Client {
113
116
  whois(query: string, options?: WhoisOptions): Promise<WhoisResult>;
114
117
  isDomainAvailable(domain: string, options?: WhoisOptions): Promise<boolean>;
115
118
  hls(manifestUrl: string, options?: HlsOptions): HlsPromise;
119
+ get ai(): ClientAI;
120
+ get hasAI(): boolean;
116
121
  }
117
122
  export declare function createClient(options?: ExtendedClientOptions): Client;
118
123
  export {};
@@ -1,4 +1,5 @@
1
1
  import { consoleLogger } from '../types/index.js';
2
+ import { ClientAIImpl } from '../ai/client-ai.js';
2
3
  import { HttpRequest } from './request.js';
3
4
  import { UndiciTransport } from '../transport/undici.js';
4
5
  import { RequestPromise } from './request-promise.js';
@@ -41,6 +42,8 @@ export class Client {
41
42
  cookieJar;
42
43
  cookieIgnoreInvalid = false;
43
44
  defaultTimeout;
45
+ _aiConfig;
46
+ _ai;
44
47
  constructor(options = {}) {
45
48
  this.baseUrl = options.baseUrl || '';
46
49
  this.middlewares = options.middlewares || [];
@@ -150,6 +153,9 @@ export class Client {
150
153
  if (options.cookies) {
151
154
  this.setupCookieJar(options.cookies);
152
155
  }
156
+ if (options._aiConfig) {
157
+ this._aiConfig = options._aiConfig;
158
+ }
153
159
  if (this.maxResponseSize !== undefined) {
154
160
  this.middlewares.push(this.createMaxSizeMiddleware(this.maxResponseSize));
155
161
  }
@@ -661,6 +667,18 @@ export class Client {
661
667
  hls(manifestUrl, options = {}) {
662
668
  return new HlsPromise(this, manifestUrl, options);
663
669
  }
670
+ get ai() {
671
+ if (!this._ai) {
672
+ if (!this._aiConfig) {
673
+ throw new ConfigurationError('AI features require an AI-enabled preset. Use createClient(openai({...})), createClient(anthropic({...})), etc.', { configKey: '_aiConfig' });
674
+ }
675
+ this._ai = new ClientAIImpl(this, this._aiConfig);
676
+ }
677
+ return this._ai;
678
+ }
679
+ get hasAI() {
680
+ return this._aiConfig !== undefined;
681
+ }
664
682
  }
665
683
  export function createClient(options = {}) {
666
684
  return new Client(options);
@@ -8,6 +8,9 @@ import { createHybridSearch } from './search/index.js';
8
8
  import { UnsupportedError } from '../core/errors.js';
9
9
  import { getIpInfo, isValidIP, isGeoIPAvailable, isBogon, isIPv6 } from './ip-intel.js';
10
10
  import { networkTools, networkToolHandlers } from './tools/network.js';
11
+ import { seoTools, seoToolHandlers } from './tools/seo.js';
12
+ import { scrapeTools, scrapeToolHandlers } from './tools/scrape.js';
13
+ import { securityTools, securityToolHandlers } from './tools/security.js';
11
14
  import { ToolRegistry } from './tools/registry.js';
12
15
  import { loadToolModules } from './tools/loader.js';
13
16
  export class MCPServer {
@@ -45,6 +48,18 @@ export class MCPServer {
45
48
  tools: networkTools,
46
49
  handlers: networkToolHandlers
47
50
  });
51
+ this.toolRegistry.registerModule({
52
+ tools: seoTools,
53
+ handlers: seoToolHandlers
54
+ });
55
+ this.toolRegistry.registerModule({
56
+ tools: scrapeTools,
57
+ handlers: scrapeToolHandlers
58
+ });
59
+ this.toolRegistry.registerModule({
60
+ tools: securityTools,
61
+ handlers: securityToolHandlers
62
+ });
48
63
  }
49
64
  indexReady = null;
50
65
  async ensureIndexReady() {
@@ -0,0 +1,3 @@
1
+ import type { MCPTool, MCPToolResult } from '../types.js';
2
+ export declare const scrapeTools: MCPTool[];
3
+ export declare const scrapeToolHandlers: Record<string, (args: Record<string, unknown>) => Promise<MCPToolResult>>;
@@ -0,0 +1,156 @@
1
+ import { createClient } from '../../core/client.js';
2
+ import { ScrapeDocument } from '../../scrape/document.js';
3
+ async function scrapeUrl(args) {
4
+ const url = String(args.url || '');
5
+ const selectors = args.selectors;
6
+ const extract = args.extract;
7
+ const selector = args.selector;
8
+ if (!url) {
9
+ return {
10
+ content: [{ type: 'text', text: 'Error: url is required' }],
11
+ isError: true,
12
+ };
13
+ }
14
+ try {
15
+ const client = createClient({ timeout: 30000 });
16
+ const response = await client.get(url);
17
+ const html = await response.text();
18
+ const doc = await ScrapeDocument.create(html, { baseUrl: url });
19
+ const output = {
20
+ url,
21
+ title: doc.title(),
22
+ };
23
+ if (selector) {
24
+ const elements = doc.selectAll(selector);
25
+ output.results = elements.map(el => ({
26
+ text: el.text(),
27
+ html: el.html(),
28
+ tag: el.tagName(),
29
+ attrs: el.attrs(),
30
+ }));
31
+ output.count = elements.length;
32
+ }
33
+ if (selectors && Object.keys(selectors).length > 0) {
34
+ const extracted = {};
35
+ for (const [key, sel] of Object.entries(selectors)) {
36
+ const isMultiple = sel.endsWith('[]');
37
+ const actualSel = isMultiple ? sel.slice(0, -2) : sel;
38
+ if (isMultiple) {
39
+ extracted[key] = doc.texts(actualSel);
40
+ }
41
+ else {
42
+ extracted[key] = doc.text(actualSel);
43
+ }
44
+ }
45
+ output.data = extracted;
46
+ }
47
+ const extractSet = new Set(extract || []);
48
+ if (extractSet.has('links') || extractSet.has('all')) {
49
+ const links = doc.links({ absolute: true });
50
+ output.links = links.slice(0, 100).map(l => ({
51
+ href: l.href,
52
+ text: l.text?.slice(0, 100),
53
+ rel: l.rel,
54
+ }));
55
+ output.linkCount = links.length;
56
+ }
57
+ if (extractSet.has('images') || extractSet.has('all')) {
58
+ const images = doc.images({ absolute: true });
59
+ output.images = images.slice(0, 50).map(img => ({
60
+ src: img.src,
61
+ alt: img.alt,
62
+ width: img.width,
63
+ height: img.height,
64
+ }));
65
+ output.imageCount = images.length;
66
+ }
67
+ if (extractSet.has('meta') || extractSet.has('all')) {
68
+ output.meta = doc.meta();
69
+ }
70
+ if (extractSet.has('og') || extractSet.has('all')) {
71
+ output.openGraph = doc.openGraph();
72
+ }
73
+ if (extractSet.has('twitter') || extractSet.has('all')) {
74
+ output.twitterCard = doc.twitterCard();
75
+ }
76
+ if (extractSet.has('jsonld') || extractSet.has('all')) {
77
+ output.jsonLd = doc.jsonLd();
78
+ }
79
+ if (extractSet.has('tables') || extractSet.has('all')) {
80
+ const tables = doc.tables();
81
+ output.tables = tables.slice(0, 10).map(t => ({
82
+ headers: t.headers,
83
+ rows: t.rows.slice(0, 50),
84
+ }));
85
+ output.tableCount = tables.length;
86
+ }
87
+ if (extractSet.has('forms') || extractSet.has('all')) {
88
+ output.forms = doc.forms();
89
+ }
90
+ if (extractSet.has('headings')) {
91
+ output.headings = {
92
+ h1: doc.texts('h1'),
93
+ h2: doc.texts('h2'),
94
+ h3: doc.texts('h3'),
95
+ };
96
+ }
97
+ return {
98
+ content: [{
99
+ type: 'text',
100
+ text: JSON.stringify(output, null, 2),
101
+ }],
102
+ };
103
+ }
104
+ catch (error) {
105
+ return {
106
+ content: [{
107
+ type: 'text',
108
+ text: `Scrape failed: ${error.message}`,
109
+ }],
110
+ isError: true,
111
+ };
112
+ }
113
+ }
114
+ export const scrapeTools = [
115
+ {
116
+ name: 'rek_scrape',
117
+ description: `Scrape a web page and extract data using CSS selectors.
118
+
119
+ Supports multiple extraction modes:
120
+ - Single selector: Extract elements matching one CSS selector
121
+ - Selector map: Extract multiple fields at once
122
+ - Built-in extractors: links, images, meta, og, twitter, jsonld, tables, forms, headings
123
+
124
+ Examples:
125
+ - Get all product titles: selector=".product-title"
126
+ - Extract multiple fields: selectors={"title":"h1","price":".price","desc":".description"}
127
+ - Get all links and images: extract=["links","images"]
128
+ - Full extraction: extract=["all"]`,
129
+ inputSchema: {
130
+ type: 'object',
131
+ properties: {
132
+ url: {
133
+ type: 'string',
134
+ description: 'URL to scrape',
135
+ },
136
+ selector: {
137
+ type: 'string',
138
+ description: 'Single CSS selector to extract elements (e.g., ".product-card", "article h2")',
139
+ },
140
+ selectors: {
141
+ type: 'object',
142
+ description: 'Map of field names to CSS selectors. Add [] suffix for multiple values (e.g., {"title":"h1","links[]":"a"})',
143
+ },
144
+ extract: {
145
+ type: 'array',
146
+ items: { type: 'string' },
147
+ description: 'Built-in extractors to run: links, images, meta, og, twitter, jsonld, tables, forms, headings, all',
148
+ },
149
+ },
150
+ required: ['url'],
151
+ },
152
+ },
153
+ ];
154
+ export const scrapeToolHandlers = {
155
+ rek_scrape: scrapeUrl,
156
+ };
@@ -0,0 +1,3 @@
1
+ import type { MCPTool, MCPToolResult } from '../types.js';
2
+ export declare const securityTools: MCPTool[];
3
+ export declare const securityToolHandlers: Record<string, (args: Record<string, unknown>) => Promise<MCPToolResult>>;