recker 1.0.29 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -1
- package/dist/ai/client-ai.d.ts +41 -0
- package/dist/ai/client-ai.js +391 -0
- package/dist/ai/index.d.ts +2 -0
- package/dist/ai/index.js +2 -0
- package/dist/ai/memory.d.ts +35 -0
- package/dist/ai/memory.js +136 -0
- package/dist/browser/ai/client-ai.d.ts +41 -0
- package/dist/browser/ai/client-ai.js +391 -0
- package/dist/browser/ai/memory.d.ts +35 -0
- package/dist/browser/ai/memory.js +136 -0
- package/dist/browser/core/client.d.ts +6 -1
- package/dist/browser/core/client.js +18 -0
- package/dist/browser/transport/undici.js +11 -2
- package/dist/browser/types/ai-client.d.ts +32 -0
- package/dist/browser/types/ai-client.js +1 -0
- package/dist/browser/types/ai.d.ts +1 -1
- package/dist/cli/index.js +261 -1
- package/dist/cli/tui/scroll-buffer.js +4 -4
- package/dist/cli/tui/shell.d.ts +3 -0
- package/dist/cli/tui/shell.js +166 -19
- package/dist/core/client.d.ts +6 -1
- package/dist/core/client.js +18 -0
- package/dist/mcp/server.js +15 -0
- package/dist/mcp/tools/scrape.d.ts +3 -0
- package/dist/mcp/tools/scrape.js +156 -0
- package/dist/mcp/tools/security.d.ts +3 -0
- package/dist/mcp/tools/security.js +471 -0
- package/dist/mcp/tools/seo.d.ts +3 -0
- package/dist/mcp/tools/seo.js +427 -0
- package/dist/presets/anthropic.d.ts +3 -1
- package/dist/presets/anthropic.js +11 -1
- package/dist/presets/azure-openai.d.ts +3 -1
- package/dist/presets/azure-openai.js +11 -1
- package/dist/presets/cohere.d.ts +3 -1
- package/dist/presets/cohere.js +8 -2
- package/dist/presets/deepseek.d.ts +3 -1
- package/dist/presets/deepseek.js +8 -2
- package/dist/presets/fireworks.d.ts +3 -1
- package/dist/presets/fireworks.js +8 -2
- package/dist/presets/gemini.d.ts +3 -1
- package/dist/presets/gemini.js +8 -1
- package/dist/presets/groq.d.ts +3 -1
- package/dist/presets/groq.js +8 -2
- package/dist/presets/huggingface.d.ts +3 -1
- package/dist/presets/huggingface.js +8 -1
- package/dist/presets/mistral.d.ts +3 -1
- package/dist/presets/mistral.js +8 -2
- package/dist/presets/openai.d.ts +3 -1
- package/dist/presets/openai.js +9 -2
- package/dist/presets/perplexity.d.ts +3 -1
- package/dist/presets/perplexity.js +8 -2
- package/dist/presets/registry.d.ts +4 -0
- package/dist/presets/registry.js +48 -0
- package/dist/presets/replicate.d.ts +3 -1
- package/dist/presets/replicate.js +8 -1
- package/dist/presets/together.d.ts +3 -1
- package/dist/presets/together.js +8 -2
- package/dist/presets/xai.d.ts +3 -1
- package/dist/presets/xai.js +8 -2
- package/dist/scrape/spider.js +1 -1
- package/dist/transport/undici.js +11 -2
- package/dist/types/ai-client.d.ts +32 -0
- package/dist/types/ai-client.js +1 -0
- package/dist/types/ai.d.ts +1 -1
- package/dist/utils/colors.d.ts +2 -0
- package/dist/utils/colors.js +4 -0
- package/package.json +1 -1
package/dist/core/client.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ClientOptions, Middleware, ReckerRequest, ReckerResponse, RequestOptions, CacheStorage, PageResult } from '../types/index.js';
|
|
2
|
+
import type { ClientAI, ClientOptionsWithAI } from '../types/ai-client.js';
|
|
2
3
|
import { RequestPromise } from './request-promise.js';
|
|
3
4
|
import { PaginationOptions } from '../plugins/pagination.js';
|
|
4
5
|
import { RetryOptions } from '../plugins/retry.js';
|
|
@@ -36,7 +37,9 @@ export declare class Client {
|
|
|
36
37
|
private cookieJar?;
|
|
37
38
|
private cookieIgnoreInvalid;
|
|
38
39
|
private defaultTimeout?;
|
|
39
|
-
|
|
40
|
+
private _aiConfig?;
|
|
41
|
+
private _ai?;
|
|
42
|
+
constructor(options?: ExtendedClientOptions & Partial<ClientOptionsWithAI>);
|
|
40
43
|
private createLoggingMiddleware;
|
|
41
44
|
private createMaxSizeMiddleware;
|
|
42
45
|
private setupCookieJar;
|
|
@@ -113,6 +116,8 @@ export declare class Client {
|
|
|
113
116
|
whois(query: string, options?: WhoisOptions): Promise<WhoisResult>;
|
|
114
117
|
isDomainAvailable(domain: string, options?: WhoisOptions): Promise<boolean>;
|
|
115
118
|
hls(manifestUrl: string, options?: HlsOptions): HlsPromise;
|
|
119
|
+
get ai(): ClientAI;
|
|
120
|
+
get hasAI(): boolean;
|
|
116
121
|
}
|
|
117
122
|
export declare function createClient(options?: ExtendedClientOptions): Client;
|
|
118
123
|
export {};
|
package/dist/core/client.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { consoleLogger } from '../types/index.js';
|
|
2
|
+
import { ClientAIImpl } from '../ai/client-ai.js';
|
|
2
3
|
import { HttpRequest } from './request.js';
|
|
3
4
|
import { UndiciTransport } from '../transport/undici.js';
|
|
4
5
|
import { RequestPromise } from './request-promise.js';
|
|
@@ -41,6 +42,8 @@ export class Client {
|
|
|
41
42
|
cookieJar;
|
|
42
43
|
cookieIgnoreInvalid = false;
|
|
43
44
|
defaultTimeout;
|
|
45
|
+
_aiConfig;
|
|
46
|
+
_ai;
|
|
44
47
|
constructor(options = {}) {
|
|
45
48
|
this.baseUrl = options.baseUrl || '';
|
|
46
49
|
this.middlewares = options.middlewares || [];
|
|
@@ -150,6 +153,9 @@ export class Client {
|
|
|
150
153
|
if (options.cookies) {
|
|
151
154
|
this.setupCookieJar(options.cookies);
|
|
152
155
|
}
|
|
156
|
+
if (options._aiConfig) {
|
|
157
|
+
this._aiConfig = options._aiConfig;
|
|
158
|
+
}
|
|
153
159
|
if (this.maxResponseSize !== undefined) {
|
|
154
160
|
this.middlewares.push(this.createMaxSizeMiddleware(this.maxResponseSize));
|
|
155
161
|
}
|
|
@@ -661,6 +667,18 @@ export class Client {
|
|
|
661
667
|
hls(manifestUrl, options = {}) {
|
|
662
668
|
return new HlsPromise(this, manifestUrl, options);
|
|
663
669
|
}
|
|
670
|
+
get ai() {
|
|
671
|
+
if (!this._ai) {
|
|
672
|
+
if (!this._aiConfig) {
|
|
673
|
+
throw new ConfigurationError('AI features require an AI-enabled preset. Use createClient(openai({...})), createClient(anthropic({...})), etc.', { configKey: '_aiConfig' });
|
|
674
|
+
}
|
|
675
|
+
this._ai = new ClientAIImpl(this, this._aiConfig);
|
|
676
|
+
}
|
|
677
|
+
return this._ai;
|
|
678
|
+
}
|
|
679
|
+
get hasAI() {
|
|
680
|
+
return this._aiConfig !== undefined;
|
|
681
|
+
}
|
|
664
682
|
}
|
|
665
683
|
export function createClient(options = {}) {
|
|
666
684
|
return new Client(options);
|
package/dist/mcp/server.js
CHANGED
|
@@ -8,6 +8,9 @@ import { createHybridSearch } from './search/index.js';
|
|
|
8
8
|
import { UnsupportedError } from '../core/errors.js';
|
|
9
9
|
import { getIpInfo, isValidIP, isGeoIPAvailable, isBogon, isIPv6 } from './ip-intel.js';
|
|
10
10
|
import { networkTools, networkToolHandlers } from './tools/network.js';
|
|
11
|
+
import { seoTools, seoToolHandlers } from './tools/seo.js';
|
|
12
|
+
import { scrapeTools, scrapeToolHandlers } from './tools/scrape.js';
|
|
13
|
+
import { securityTools, securityToolHandlers } from './tools/security.js';
|
|
11
14
|
import { ToolRegistry } from './tools/registry.js';
|
|
12
15
|
import { loadToolModules } from './tools/loader.js';
|
|
13
16
|
export class MCPServer {
|
|
@@ -45,6 +48,18 @@ export class MCPServer {
|
|
|
45
48
|
tools: networkTools,
|
|
46
49
|
handlers: networkToolHandlers
|
|
47
50
|
});
|
|
51
|
+
this.toolRegistry.registerModule({
|
|
52
|
+
tools: seoTools,
|
|
53
|
+
handlers: seoToolHandlers
|
|
54
|
+
});
|
|
55
|
+
this.toolRegistry.registerModule({
|
|
56
|
+
tools: scrapeTools,
|
|
57
|
+
handlers: scrapeToolHandlers
|
|
58
|
+
});
|
|
59
|
+
this.toolRegistry.registerModule({
|
|
60
|
+
tools: securityTools,
|
|
61
|
+
handlers: securityToolHandlers
|
|
62
|
+
});
|
|
48
63
|
}
|
|
49
64
|
indexReady = null;
|
|
50
65
|
async ensureIndexReady() {
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { createClient } from '../../core/client.js';
|
|
2
|
+
import { ScrapeDocument } from '../../scrape/document.js';
|
|
3
|
+
async function scrapeUrl(args) {
|
|
4
|
+
const url = String(args.url || '');
|
|
5
|
+
const selectors = args.selectors;
|
|
6
|
+
const extract = args.extract;
|
|
7
|
+
const selector = args.selector;
|
|
8
|
+
if (!url) {
|
|
9
|
+
return {
|
|
10
|
+
content: [{ type: 'text', text: 'Error: url is required' }],
|
|
11
|
+
isError: true,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
try {
|
|
15
|
+
const client = createClient({ timeout: 30000 });
|
|
16
|
+
const response = await client.get(url);
|
|
17
|
+
const html = await response.text();
|
|
18
|
+
const doc = await ScrapeDocument.create(html, { baseUrl: url });
|
|
19
|
+
const output = {
|
|
20
|
+
url,
|
|
21
|
+
title: doc.title(),
|
|
22
|
+
};
|
|
23
|
+
if (selector) {
|
|
24
|
+
const elements = doc.selectAll(selector);
|
|
25
|
+
output.results = elements.map(el => ({
|
|
26
|
+
text: el.text(),
|
|
27
|
+
html: el.html(),
|
|
28
|
+
tag: el.tagName(),
|
|
29
|
+
attrs: el.attrs(),
|
|
30
|
+
}));
|
|
31
|
+
output.count = elements.length;
|
|
32
|
+
}
|
|
33
|
+
if (selectors && Object.keys(selectors).length > 0) {
|
|
34
|
+
const extracted = {};
|
|
35
|
+
for (const [key, sel] of Object.entries(selectors)) {
|
|
36
|
+
const isMultiple = sel.endsWith('[]');
|
|
37
|
+
const actualSel = isMultiple ? sel.slice(0, -2) : sel;
|
|
38
|
+
if (isMultiple) {
|
|
39
|
+
extracted[key] = doc.texts(actualSel);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
extracted[key] = doc.text(actualSel);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
output.data = extracted;
|
|
46
|
+
}
|
|
47
|
+
const extractSet = new Set(extract || []);
|
|
48
|
+
if (extractSet.has('links') || extractSet.has('all')) {
|
|
49
|
+
const links = doc.links({ absolute: true });
|
|
50
|
+
output.links = links.slice(0, 100).map(l => ({
|
|
51
|
+
href: l.href,
|
|
52
|
+
text: l.text?.slice(0, 100),
|
|
53
|
+
rel: l.rel,
|
|
54
|
+
}));
|
|
55
|
+
output.linkCount = links.length;
|
|
56
|
+
}
|
|
57
|
+
if (extractSet.has('images') || extractSet.has('all')) {
|
|
58
|
+
const images = doc.images({ absolute: true });
|
|
59
|
+
output.images = images.slice(0, 50).map(img => ({
|
|
60
|
+
src: img.src,
|
|
61
|
+
alt: img.alt,
|
|
62
|
+
width: img.width,
|
|
63
|
+
height: img.height,
|
|
64
|
+
}));
|
|
65
|
+
output.imageCount = images.length;
|
|
66
|
+
}
|
|
67
|
+
if (extractSet.has('meta') || extractSet.has('all')) {
|
|
68
|
+
output.meta = doc.meta();
|
|
69
|
+
}
|
|
70
|
+
if (extractSet.has('og') || extractSet.has('all')) {
|
|
71
|
+
output.openGraph = doc.openGraph();
|
|
72
|
+
}
|
|
73
|
+
if (extractSet.has('twitter') || extractSet.has('all')) {
|
|
74
|
+
output.twitterCard = doc.twitterCard();
|
|
75
|
+
}
|
|
76
|
+
if (extractSet.has('jsonld') || extractSet.has('all')) {
|
|
77
|
+
output.jsonLd = doc.jsonLd();
|
|
78
|
+
}
|
|
79
|
+
if (extractSet.has('tables') || extractSet.has('all')) {
|
|
80
|
+
const tables = doc.tables();
|
|
81
|
+
output.tables = tables.slice(0, 10).map(t => ({
|
|
82
|
+
headers: t.headers,
|
|
83
|
+
rows: t.rows.slice(0, 50),
|
|
84
|
+
}));
|
|
85
|
+
output.tableCount = tables.length;
|
|
86
|
+
}
|
|
87
|
+
if (extractSet.has('forms') || extractSet.has('all')) {
|
|
88
|
+
output.forms = doc.forms();
|
|
89
|
+
}
|
|
90
|
+
if (extractSet.has('headings')) {
|
|
91
|
+
output.headings = {
|
|
92
|
+
h1: doc.texts('h1'),
|
|
93
|
+
h2: doc.texts('h2'),
|
|
94
|
+
h3: doc.texts('h3'),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
content: [{
|
|
99
|
+
type: 'text',
|
|
100
|
+
text: JSON.stringify(output, null, 2),
|
|
101
|
+
}],
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
return {
|
|
106
|
+
content: [{
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: `Scrape failed: ${error.message}`,
|
|
109
|
+
}],
|
|
110
|
+
isError: true,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
export const scrapeTools = [
|
|
115
|
+
{
|
|
116
|
+
name: 'rek_scrape',
|
|
117
|
+
description: `Scrape a web page and extract data using CSS selectors.
|
|
118
|
+
|
|
119
|
+
Supports multiple extraction modes:
|
|
120
|
+
- Single selector: Extract elements matching one CSS selector
|
|
121
|
+
- Selector map: Extract multiple fields at once
|
|
122
|
+
- Built-in extractors: links, images, meta, og, twitter, jsonld, tables, forms, headings
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
- Get all product titles: selector=".product-title"
|
|
126
|
+
- Extract multiple fields: selectors={"title":"h1","price":".price","desc":".description"}
|
|
127
|
+
- Get all links and images: extract=["links","images"]
|
|
128
|
+
- Full extraction: extract=["all"]`,
|
|
129
|
+
inputSchema: {
|
|
130
|
+
type: 'object',
|
|
131
|
+
properties: {
|
|
132
|
+
url: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
description: 'URL to scrape',
|
|
135
|
+
},
|
|
136
|
+
selector: {
|
|
137
|
+
type: 'string',
|
|
138
|
+
description: 'Single CSS selector to extract elements (e.g., ".product-card", "article h2")',
|
|
139
|
+
},
|
|
140
|
+
selectors: {
|
|
141
|
+
type: 'object',
|
|
142
|
+
description: 'Map of field names to CSS selectors. Add [] suffix for multiple values (e.g., {"title":"h1","links[]":"a"})',
|
|
143
|
+
},
|
|
144
|
+
extract: {
|
|
145
|
+
type: 'array',
|
|
146
|
+
items: { type: 'string' },
|
|
147
|
+
description: 'Built-in extractors to run: links, images, meta, og, twitter, jsonld, tables, forms, headings, all',
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
required: ['url'],
|
|
151
|
+
},
|
|
152
|
+
},
|
|
153
|
+
];
|
|
154
|
+
export const scrapeToolHandlers = {
|
|
155
|
+
rek_scrape: scrapeUrl,
|
|
156
|
+
};
|