recker 1.0.29-next.7cc1d8b → 1.0.29-next.cf0cafb
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/memory.d.ts +35 -0
- package/dist/ai/memory.js +136 -0
- package/dist/browser/types/ai-client.d.ts +29 -0
- package/dist/browser/types/ai-client.js +1 -0
- package/dist/cli/tui/scroll-buffer.js +4 -4
- package/dist/cli/tui/shell.js +40 -17
- package/dist/mcp/server.js +15 -0
- package/dist/mcp/tools/scrape.d.ts +3 -0
- package/dist/mcp/tools/scrape.js +156 -0
- package/dist/mcp/tools/security.d.ts +3 -0
- package/dist/mcp/tools/security.js +471 -0
- package/dist/mcp/tools/seo.d.ts +3 -0
- package/dist/mcp/tools/seo.js +427 -0
- package/dist/scrape/spider.js +1 -1
- package/dist/types/ai-client.d.ts +29 -0
- package/dist/types/ai-client.js +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { ChatMessage } from '../types/ai.js';
|
|
2
|
+
import type { AIMemoryConfig } from '../types/ai-client.js';
|
|
3
|
+
export declare class ConversationMemory {
|
|
4
|
+
private config;
|
|
5
|
+
private systemMessage;
|
|
6
|
+
private messages;
|
|
7
|
+
constructor(config?: AIMemoryConfig);
|
|
8
|
+
setSystemPrompt(prompt: string): void;
|
|
9
|
+
getSystemPrompt(): string;
|
|
10
|
+
addUserMessage(content: string): void;
|
|
11
|
+
addAssistantMessage(content: string): void;
|
|
12
|
+
addMessage(message: ChatMessage): void;
|
|
13
|
+
buildMessages(userPrompt: string): ChatMessage[];
|
|
14
|
+
recordResponse(content: string): void;
|
|
15
|
+
getMessages(): ChatMessage[];
|
|
16
|
+
getConversation(): readonly ChatMessage[];
|
|
17
|
+
getPairCount(): number;
|
|
18
|
+
clear(): void;
|
|
19
|
+
reset(): void;
|
|
20
|
+
setConfig(config: Partial<AIMemoryConfig>): void;
|
|
21
|
+
getConfig(): AIMemoryConfig;
|
|
22
|
+
private prune;
|
|
23
|
+
isEmpty(): boolean;
|
|
24
|
+
getMessageCount(): number;
|
|
25
|
+
toJSON(): {
|
|
26
|
+
config: AIMemoryConfig;
|
|
27
|
+
systemPrompt: string | null;
|
|
28
|
+
messages: ChatMessage[];
|
|
29
|
+
};
|
|
30
|
+
static fromJSON(data: {
|
|
31
|
+
config?: AIMemoryConfig;
|
|
32
|
+
systemPrompt?: string | null;
|
|
33
|
+
messages?: ChatMessage[];
|
|
34
|
+
}): ConversationMemory;
|
|
35
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
const DEFAULT_MAX_PAIRS = 12;
|
|
2
|
+
export class ConversationMemory {
|
|
3
|
+
config;
|
|
4
|
+
systemMessage = null;
|
|
5
|
+
messages = [];
|
|
6
|
+
constructor(config = {}) {
|
|
7
|
+
this.config = {
|
|
8
|
+
maxPairs: config.maxPairs ?? DEFAULT_MAX_PAIRS,
|
|
9
|
+
systemPrompt: config.systemPrompt ?? '',
|
|
10
|
+
};
|
|
11
|
+
if (this.config.systemPrompt) {
|
|
12
|
+
this.systemMessage = {
|
|
13
|
+
role: 'system',
|
|
14
|
+
content: this.config.systemPrompt,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
setSystemPrompt(prompt) {
|
|
19
|
+
this.config.systemPrompt = prompt;
|
|
20
|
+
if (prompt) {
|
|
21
|
+
this.systemMessage = {
|
|
22
|
+
role: 'system',
|
|
23
|
+
content: prompt,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
this.systemMessage = null;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
getSystemPrompt() {
|
|
31
|
+
return this.config.systemPrompt;
|
|
32
|
+
}
|
|
33
|
+
addUserMessage(content) {
|
|
34
|
+
this.messages.push({
|
|
35
|
+
role: 'user',
|
|
36
|
+
content,
|
|
37
|
+
});
|
|
38
|
+
this.prune();
|
|
39
|
+
}
|
|
40
|
+
addAssistantMessage(content) {
|
|
41
|
+
this.messages.push({
|
|
42
|
+
role: 'assistant',
|
|
43
|
+
content,
|
|
44
|
+
});
|
|
45
|
+
this.prune();
|
|
46
|
+
}
|
|
47
|
+
addMessage(message) {
|
|
48
|
+
if (message.role === 'system') {
|
|
49
|
+
this.setSystemPrompt(typeof message.content === 'string' ? message.content : '');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
this.messages.push(message);
|
|
53
|
+
this.prune();
|
|
54
|
+
}
|
|
55
|
+
buildMessages(userPrompt) {
|
|
56
|
+
this.addUserMessage(userPrompt);
|
|
57
|
+
return this.getMessages();
|
|
58
|
+
}
|
|
59
|
+
recordResponse(content) {
|
|
60
|
+
this.addAssistantMessage(content);
|
|
61
|
+
}
|
|
62
|
+
getMessages() {
|
|
63
|
+
const result = [];
|
|
64
|
+
if (this.systemMessage) {
|
|
65
|
+
result.push(this.systemMessage);
|
|
66
|
+
}
|
|
67
|
+
result.push(...this.messages);
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
getConversation() {
|
|
71
|
+
return this.messages;
|
|
72
|
+
}
|
|
73
|
+
getPairCount() {
|
|
74
|
+
let pairs = 0;
|
|
75
|
+
for (let i = 0; i < this.messages.length - 1; i += 2) {
|
|
76
|
+
if (this.messages[i].role === 'user' &&
|
|
77
|
+
this.messages[i + 1]?.role === 'assistant') {
|
|
78
|
+
pairs++;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return pairs;
|
|
82
|
+
}
|
|
83
|
+
clear() {
|
|
84
|
+
this.messages = [];
|
|
85
|
+
}
|
|
86
|
+
reset() {
|
|
87
|
+
this.messages = [];
|
|
88
|
+
this.systemMessage = null;
|
|
89
|
+
this.config.systemPrompt = '';
|
|
90
|
+
}
|
|
91
|
+
setConfig(config) {
|
|
92
|
+
if (config.maxPairs !== undefined) {
|
|
93
|
+
this.config.maxPairs = config.maxPairs;
|
|
94
|
+
this.prune();
|
|
95
|
+
}
|
|
96
|
+
if (config.systemPrompt !== undefined) {
|
|
97
|
+
this.setSystemPrompt(config.systemPrompt);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
getConfig() {
|
|
101
|
+
return { ...this.config };
|
|
102
|
+
}
|
|
103
|
+
prune() {
|
|
104
|
+
const maxMessages = this.config.maxPairs * 2;
|
|
105
|
+
if (this.messages.length > maxMessages) {
|
|
106
|
+
const excess = this.messages.length - maxMessages;
|
|
107
|
+
const toRemove = Math.ceil(excess / 2) * 2;
|
|
108
|
+
this.messages = this.messages.slice(toRemove);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
isEmpty() {
|
|
112
|
+
return this.messages.length === 0;
|
|
113
|
+
}
|
|
114
|
+
getMessageCount() {
|
|
115
|
+
return this.messages.length;
|
|
116
|
+
}
|
|
117
|
+
toJSON() {
|
|
118
|
+
return {
|
|
119
|
+
config: this.config,
|
|
120
|
+
systemPrompt: this.systemMessage?.content,
|
|
121
|
+
messages: [...this.messages],
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
static fromJSON(data) {
|
|
125
|
+
const memory = new ConversationMemory(data.config);
|
|
126
|
+
if (data.systemPrompt) {
|
|
127
|
+
memory.setSystemPrompt(data.systemPrompt);
|
|
128
|
+
}
|
|
129
|
+
if (data.messages) {
|
|
130
|
+
for (const msg of data.messages) {
|
|
131
|
+
memory.addMessage(msg);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return memory;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { AIResponse, AIStream, ChatMessage, AIProvider } from './ai.js';
|
|
2
|
+
export interface AIMemoryConfig {
|
|
3
|
+
maxPairs?: number;
|
|
4
|
+
systemPrompt?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface PresetAIConfig {
|
|
7
|
+
provider: AIProvider;
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
model: string;
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
memory?: AIMemoryConfig;
|
|
12
|
+
organization?: string;
|
|
13
|
+
headers?: Record<string, string>;
|
|
14
|
+
}
|
|
15
|
+
export interface ClientAI {
|
|
16
|
+
chat(prompt: string): Promise<AIResponse>;
|
|
17
|
+
chatStream(prompt: string): Promise<AIStream>;
|
|
18
|
+
prompt(prompt: string): Promise<AIResponse>;
|
|
19
|
+
promptStream(prompt: string): Promise<AIStream>;
|
|
20
|
+
clearMemory(): void;
|
|
21
|
+
getMemory(): readonly ChatMessage[];
|
|
22
|
+
setMemoryConfig(config: Partial<AIMemoryConfig>): void;
|
|
23
|
+
getMemoryConfig(): AIMemoryConfig;
|
|
24
|
+
readonly provider: AIProvider;
|
|
25
|
+
readonly model: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ClientOptionsWithAI {
|
|
28
|
+
_aiConfig?: PresetAIConfig;
|
|
29
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -119,17 +119,17 @@ export class ScrollBuffer extends EventEmitter {
|
|
|
119
119
|
}
|
|
120
120
|
export function parseScrollKey(data) {
|
|
121
121
|
const str = data.toString();
|
|
122
|
-
if (str === '\x1b[5~' || str === '\x1bOy')
|
|
122
|
+
if (str === '\x1b[5~' || str === '\x1bOy' || str === '\x1b[5;5~' || str === '\x1b[5;2~')
|
|
123
123
|
return 'pageUp';
|
|
124
|
-
if (str === '\x1b[6~' || str === '\x1bOs')
|
|
124
|
+
if (str === '\x1b[6~' || str === '\x1bOs' || str === '\x1b[6;5~' || str === '\x1b[6;2~')
|
|
125
125
|
return 'pageDown';
|
|
126
126
|
if (str === '\x1b[1;2A')
|
|
127
127
|
return 'scrollUp';
|
|
128
128
|
if (str === '\x1b[1;2B')
|
|
129
129
|
return 'scrollDown';
|
|
130
|
-
if (str === '\x1b[H' || str === '\x1b[1~' || str === '\x1bOH')
|
|
130
|
+
if (str === '\x1b[H' || str === '\x1b[1~' || str === '\x1bOH' || str === '\x1b[7~')
|
|
131
131
|
return 'home';
|
|
132
|
-
if (str === '\x1b[F' || str === '\x1b[4~' || str === '\x1bOF')
|
|
132
|
+
if (str === '\x1b[F' || str === '\x1b[4~' || str === '\x1bOF' || str === '\x1b[8~')
|
|
133
133
|
return 'end';
|
|
134
134
|
if (str === 'q' || str === 'Q')
|
|
135
135
|
return 'quit';
|
package/dist/cli/tui/shell.js
CHANGED
|
@@ -173,20 +173,36 @@ export class RekShell {
|
|
|
173
173
|
}
|
|
174
174
|
return true;
|
|
175
175
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if (scrollKey
|
|
179
|
-
if (
|
|
176
|
+
try {
|
|
177
|
+
const scrollKey = parseScrollKey(data);
|
|
178
|
+
if (scrollKey) {
|
|
179
|
+
if (scrollKey === 'quit') {
|
|
180
|
+
if (self.inScrollMode) {
|
|
181
|
+
self.exitScrollMode();
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
return originalEmit(event, ...args);
|
|
185
|
+
}
|
|
186
|
+
self.handleScrollKey(scrollKey);
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
if (self.inScrollMode) {
|
|
190
|
+
if (str === '\x1b[A') {
|
|
191
|
+
self.handleScrollKey('scrollUp');
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
if (str === '\x1b[B') {
|
|
195
|
+
self.handleScrollKey('scrollDown');
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
if (str === '\x1b' || str === '\x1b\x1b') {
|
|
180
199
|
self.exitScrollMode();
|
|
181
200
|
return true;
|
|
182
201
|
}
|
|
183
|
-
return
|
|
202
|
+
return true;
|
|
184
203
|
}
|
|
185
|
-
self.handleScrollKey(scrollKey);
|
|
186
|
-
return true;
|
|
187
204
|
}
|
|
188
|
-
|
|
189
|
-
return true;
|
|
205
|
+
catch {
|
|
190
206
|
}
|
|
191
207
|
}
|
|
192
208
|
return originalEmit(event, ...args);
|
|
@@ -194,6 +210,9 @@ export class RekShell {
|
|
|
194
210
|
}
|
|
195
211
|
}
|
|
196
212
|
handleScrollKey(key) {
|
|
213
|
+
if (!this.originalStdoutWrite) {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
197
216
|
let needsRedraw = false;
|
|
198
217
|
switch (key) {
|
|
199
218
|
case 'pageUp':
|
|
@@ -250,11 +269,15 @@ export class RekShell {
|
|
|
250
269
|
enterScrollMode() {
|
|
251
270
|
if (this.inScrollMode)
|
|
252
271
|
return;
|
|
272
|
+
if (!this.originalStdoutWrite)
|
|
273
|
+
return;
|
|
253
274
|
this.inScrollMode = true;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
275
|
+
try {
|
|
276
|
+
this.rl.pause();
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
257
279
|
}
|
|
280
|
+
this.originalStdoutWrite('\x1b[?25l');
|
|
258
281
|
this.renderScrollView();
|
|
259
282
|
}
|
|
260
283
|
exitScrollMode() {
|
|
@@ -288,7 +311,7 @@ export class RekShell {
|
|
|
288
311
|
const scrollInfo = this.scrollBuffer.isScrolledUp
|
|
289
312
|
? colors.yellow(`↑ ${this.scrollBuffer.position} lines | ${info.percent}% | `)
|
|
290
313
|
: '';
|
|
291
|
-
const helpText = colors.gray('
|
|
314
|
+
const helpText = colors.gray('↑↓/PgUp/PgDn • Home/End • Esc/Q to exit');
|
|
292
315
|
const statusBar = `\x1b[${rows};1H\x1b[7m ${scrollInfo}${helpText} \x1b[0m`;
|
|
293
316
|
this.originalStdoutWrite(statusBar);
|
|
294
317
|
}
|
|
@@ -1494,7 +1517,7 @@ ${colors.bold('Network:')}
|
|
|
1494
1517
|
}
|
|
1495
1518
|
async runSpider(args) {
|
|
1496
1519
|
let url = '';
|
|
1497
|
-
let maxDepth =
|
|
1520
|
+
let maxDepth = 5;
|
|
1498
1521
|
let maxPages = 100;
|
|
1499
1522
|
let concurrency = 5;
|
|
1500
1523
|
let seoEnabled = false;
|
|
@@ -1502,7 +1525,7 @@ ${colors.bold('Network:')}
|
|
|
1502
1525
|
for (let i = 0; i < args.length; i++) {
|
|
1503
1526
|
const arg = args[i];
|
|
1504
1527
|
if (arg.startsWith('depth=')) {
|
|
1505
|
-
maxDepth = parseInt(arg.split('=')[1]) ||
|
|
1528
|
+
maxDepth = parseInt(arg.split('=')[1]) || 5;
|
|
1506
1529
|
}
|
|
1507
1530
|
else if (arg.startsWith('limit=')) {
|
|
1508
1531
|
maxPages = parseInt(arg.split('=')[1]) || 100;
|
|
@@ -1524,7 +1547,7 @@ ${colors.bold('Network:')}
|
|
|
1524
1547
|
if (!this.baseUrl) {
|
|
1525
1548
|
console.log(colors.yellow('Usage: spider <url> [options]'));
|
|
1526
1549
|
console.log(colors.gray(' Options:'));
|
|
1527
|
-
console.log(colors.gray(' depth=
|
|
1550
|
+
console.log(colors.gray(' depth=5 Max crawl depth'));
|
|
1528
1551
|
console.log(colors.gray(' limit=100 Max pages to crawl'));
|
|
1529
1552
|
console.log(colors.gray(' concurrency=5 Concurrent requests'));
|
|
1530
1553
|
console.log(colors.gray(' seo Enable SEO analysis'));
|
|
@@ -2687,7 +2710,7 @@ ${colors.bold('Network:')}
|
|
|
2687
2710
|
${colors.bold('Web Crawler:')}
|
|
2688
2711
|
${colors.green('spider <url>')} Crawl website following internal links.
|
|
2689
2712
|
${colors.gray('Options:')}
|
|
2690
|
-
${colors.white('--depth=
|
|
2713
|
+
${colors.white('--depth=5')} ${colors.gray('Maximum depth to crawl')}
|
|
2691
2714
|
${colors.white('--limit=100')} ${colors.gray('Maximum pages to crawl')}
|
|
2692
2715
|
${colors.white('--concurrency=5')} ${colors.gray('Parallel requests')}
|
|
2693
2716
|
|
package/dist/mcp/server.js
CHANGED
|
@@ -8,6 +8,9 @@ import { createHybridSearch } from './search/index.js';
|
|
|
8
8
|
import { UnsupportedError } from '../core/errors.js';
|
|
9
9
|
import { getIpInfo, isValidIP, isGeoIPAvailable, isBogon, isIPv6 } from './ip-intel.js';
|
|
10
10
|
import { networkTools, networkToolHandlers } from './tools/network.js';
|
|
11
|
+
import { seoTools, seoToolHandlers } from './tools/seo.js';
|
|
12
|
+
import { scrapeTools, scrapeToolHandlers } from './tools/scrape.js';
|
|
13
|
+
import { securityTools, securityToolHandlers } from './tools/security.js';
|
|
11
14
|
import { ToolRegistry } from './tools/registry.js';
|
|
12
15
|
import { loadToolModules } from './tools/loader.js';
|
|
13
16
|
export class MCPServer {
|
|
@@ -45,6 +48,18 @@ export class MCPServer {
|
|
|
45
48
|
tools: networkTools,
|
|
46
49
|
handlers: networkToolHandlers
|
|
47
50
|
});
|
|
51
|
+
this.toolRegistry.registerModule({
|
|
52
|
+
tools: seoTools,
|
|
53
|
+
handlers: seoToolHandlers
|
|
54
|
+
});
|
|
55
|
+
this.toolRegistry.registerModule({
|
|
56
|
+
tools: scrapeTools,
|
|
57
|
+
handlers: scrapeToolHandlers
|
|
58
|
+
});
|
|
59
|
+
this.toolRegistry.registerModule({
|
|
60
|
+
tools: securityTools,
|
|
61
|
+
handlers: securityToolHandlers
|
|
62
|
+
});
|
|
48
63
|
}
|
|
49
64
|
indexReady = null;
|
|
50
65
|
async ensureIndexReady() {
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { createClient } from '../../core/client.js';
|
|
2
|
+
import { ScrapeDocument } from '../../scrape/document.js';
|
|
3
|
+
async function scrapeUrl(args) {
|
|
4
|
+
const url = String(args.url || '');
|
|
5
|
+
const selectors = args.selectors;
|
|
6
|
+
const extract = args.extract;
|
|
7
|
+
const selector = args.selector;
|
|
8
|
+
if (!url) {
|
|
9
|
+
return {
|
|
10
|
+
content: [{ type: 'text', text: 'Error: url is required' }],
|
|
11
|
+
isError: true,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
try {
|
|
15
|
+
const client = createClient({ timeout: 30000 });
|
|
16
|
+
const response = await client.get(url);
|
|
17
|
+
const html = await response.text();
|
|
18
|
+
const doc = await ScrapeDocument.create(html, { baseUrl: url });
|
|
19
|
+
const output = {
|
|
20
|
+
url,
|
|
21
|
+
title: doc.title(),
|
|
22
|
+
};
|
|
23
|
+
if (selector) {
|
|
24
|
+
const elements = doc.selectAll(selector);
|
|
25
|
+
output.results = elements.map(el => ({
|
|
26
|
+
text: el.text(),
|
|
27
|
+
html: el.html(),
|
|
28
|
+
tag: el.tagName(),
|
|
29
|
+
attrs: el.attrs(),
|
|
30
|
+
}));
|
|
31
|
+
output.count = elements.length;
|
|
32
|
+
}
|
|
33
|
+
if (selectors && Object.keys(selectors).length > 0) {
|
|
34
|
+
const extracted = {};
|
|
35
|
+
for (const [key, sel] of Object.entries(selectors)) {
|
|
36
|
+
const isMultiple = sel.endsWith('[]');
|
|
37
|
+
const actualSel = isMultiple ? sel.slice(0, -2) : sel;
|
|
38
|
+
if (isMultiple) {
|
|
39
|
+
extracted[key] = doc.texts(actualSel);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
extracted[key] = doc.text(actualSel);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
output.data = extracted;
|
|
46
|
+
}
|
|
47
|
+
const extractSet = new Set(extract || []);
|
|
48
|
+
if (extractSet.has('links') || extractSet.has('all')) {
|
|
49
|
+
const links = doc.links({ absolute: true });
|
|
50
|
+
output.links = links.slice(0, 100).map(l => ({
|
|
51
|
+
href: l.href,
|
|
52
|
+
text: l.text?.slice(0, 100),
|
|
53
|
+
rel: l.rel,
|
|
54
|
+
}));
|
|
55
|
+
output.linkCount = links.length;
|
|
56
|
+
}
|
|
57
|
+
if (extractSet.has('images') || extractSet.has('all')) {
|
|
58
|
+
const images = doc.images({ absolute: true });
|
|
59
|
+
output.images = images.slice(0, 50).map(img => ({
|
|
60
|
+
src: img.src,
|
|
61
|
+
alt: img.alt,
|
|
62
|
+
width: img.width,
|
|
63
|
+
height: img.height,
|
|
64
|
+
}));
|
|
65
|
+
output.imageCount = images.length;
|
|
66
|
+
}
|
|
67
|
+
if (extractSet.has('meta') || extractSet.has('all')) {
|
|
68
|
+
output.meta = doc.meta();
|
|
69
|
+
}
|
|
70
|
+
if (extractSet.has('og') || extractSet.has('all')) {
|
|
71
|
+
output.openGraph = doc.openGraph();
|
|
72
|
+
}
|
|
73
|
+
if (extractSet.has('twitter') || extractSet.has('all')) {
|
|
74
|
+
output.twitterCard = doc.twitterCard();
|
|
75
|
+
}
|
|
76
|
+
if (extractSet.has('jsonld') || extractSet.has('all')) {
|
|
77
|
+
output.jsonLd = doc.jsonLd();
|
|
78
|
+
}
|
|
79
|
+
if (extractSet.has('tables') || extractSet.has('all')) {
|
|
80
|
+
const tables = doc.tables();
|
|
81
|
+
output.tables = tables.slice(0, 10).map(t => ({
|
|
82
|
+
headers: t.headers,
|
|
83
|
+
rows: t.rows.slice(0, 50),
|
|
84
|
+
}));
|
|
85
|
+
output.tableCount = tables.length;
|
|
86
|
+
}
|
|
87
|
+
if (extractSet.has('forms') || extractSet.has('all')) {
|
|
88
|
+
output.forms = doc.forms();
|
|
89
|
+
}
|
|
90
|
+
if (extractSet.has('headings')) {
|
|
91
|
+
output.headings = {
|
|
92
|
+
h1: doc.texts('h1'),
|
|
93
|
+
h2: doc.texts('h2'),
|
|
94
|
+
h3: doc.texts('h3'),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
content: [{
|
|
99
|
+
type: 'text',
|
|
100
|
+
text: JSON.stringify(output, null, 2),
|
|
101
|
+
}],
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
return {
|
|
106
|
+
content: [{
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: `Scrape failed: ${error.message}`,
|
|
109
|
+
}],
|
|
110
|
+
isError: true,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
export const scrapeTools = [
|
|
115
|
+
{
|
|
116
|
+
name: 'rek_scrape',
|
|
117
|
+
description: `Scrape a web page and extract data using CSS selectors.
|
|
118
|
+
|
|
119
|
+
Supports multiple extraction modes:
|
|
120
|
+
- Single selector: Extract elements matching one CSS selector
|
|
121
|
+
- Selector map: Extract multiple fields at once
|
|
122
|
+
- Built-in extractors: links, images, meta, og, twitter, jsonld, tables, forms, headings
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
- Get all product titles: selector=".product-title"
|
|
126
|
+
- Extract multiple fields: selectors={"title":"h1","price":".price","desc":".description"}
|
|
127
|
+
- Get all links and images: extract=["links","images"]
|
|
128
|
+
- Full extraction: extract=["all"]`,
|
|
129
|
+
inputSchema: {
|
|
130
|
+
type: 'object',
|
|
131
|
+
properties: {
|
|
132
|
+
url: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
description: 'URL to scrape',
|
|
135
|
+
},
|
|
136
|
+
selector: {
|
|
137
|
+
type: 'string',
|
|
138
|
+
description: 'Single CSS selector to extract elements (e.g., ".product-card", "article h2")',
|
|
139
|
+
},
|
|
140
|
+
selectors: {
|
|
141
|
+
type: 'object',
|
|
142
|
+
description: 'Map of field names to CSS selectors. Add [] suffix for multiple values (e.g., {"title":"h1","links[]":"a"})',
|
|
143
|
+
},
|
|
144
|
+
extract: {
|
|
145
|
+
type: 'array',
|
|
146
|
+
items: { type: 'string' },
|
|
147
|
+
description: 'Built-in extractors to run: links, images, meta, og, twitter, jsonld, tables, forms, headings, all',
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
required: ['url'],
|
|
151
|
+
},
|
|
152
|
+
},
|
|
153
|
+
];
|
|
154
|
+
export const scrapeToolHandlers = {
|
|
155
|
+
rek_scrape: scrapeUrl,
|
|
156
|
+
};
|