recker 1.0.28 → 1.0.29-next.3524ab6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -1
- package/dist/cli/tui/scroll-buffer.js +4 -4
- package/dist/cli/tui/shell.d.ts +1 -0
- package/dist/cli/tui/shell.js +375 -18
- package/dist/mcp/server.js +5 -0
- package/dist/mcp/tools/seo.d.ts +3 -0
- package/dist/mcp/tools/seo.js +427 -0
- package/dist/scrape/index.d.ts +2 -0
- package/dist/scrape/index.js +1 -0
- package/dist/scrape/spider.d.ts +61 -0
- package/dist/scrape/spider.js +250 -0
- package/dist/seo/analyzer.js +27 -0
- package/dist/seo/index.d.ts +3 -1
- package/dist/seo/index.js +1 -0
- package/dist/seo/rules/accessibility.js +620 -54
- package/dist/seo/rules/best-practices.d.ts +2 -0
- package/dist/seo/rules/best-practices.js +188 -0
- package/dist/seo/rules/crawl.d.ts +2 -0
- package/dist/seo/rules/crawl.js +307 -0
- package/dist/seo/rules/cwv.d.ts +2 -0
- package/dist/seo/rules/cwv.js +337 -0
- package/dist/seo/rules/ecommerce.d.ts +2 -0
- package/dist/seo/rules/ecommerce.js +252 -0
- package/dist/seo/rules/i18n.d.ts +2 -0
- package/dist/seo/rules/i18n.js +222 -0
- package/dist/seo/rules/index.d.ts +32 -0
- package/dist/seo/rules/index.js +71 -0
- package/dist/seo/rules/internal-linking.d.ts +2 -0
- package/dist/seo/rules/internal-linking.js +375 -0
- package/dist/seo/rules/local.d.ts +2 -0
- package/dist/seo/rules/local.js +265 -0
- package/dist/seo/rules/pwa.d.ts +2 -0
- package/dist/seo/rules/pwa.js +302 -0
- package/dist/seo/rules/readability.d.ts +2 -0
- package/dist/seo/rules/readability.js +255 -0
- package/dist/seo/rules/security.js +406 -28
- package/dist/seo/rules/social.d.ts +2 -0
- package/dist/seo/rules/social.js +373 -0
- package/dist/seo/rules/types.d.ts +155 -0
- package/dist/seo/seo-spider.d.ts +47 -0
- package/dist/seo/seo-spider.js +362 -0
- package/dist/seo/types.d.ts +24 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -109,6 +109,8 @@ See [Mini Client documentation](./docs/http/18-mini-client.md) for more.
|
|
|
109
109
|
| **Type-Safe** | Full TypeScript with Zod schema validation. |
|
|
110
110
|
| **Observable** | DNS/TCP/TLS/TTFB timing breakdown per request. |
|
|
111
111
|
| **Resilient** | Retry, circuit breaker, rate limiting, deduplication. |
|
|
112
|
+
| **SEO Analysis** | 250+ rules across 21 categories. Site-wide crawling with duplicate detection. |
|
|
113
|
+
| **Spider Crawler** | Web crawler with URL deduplication, depth control, and concurrency. |
|
|
112
114
|
| **GeoIP (Offline)** | MaxMind GeoLite2 database with bogon detection. |
|
|
113
115
|
| **RDAP Support** | Modern WHOIS with IANA Bootstrap and TLD detection. |
|
|
114
116
|
|
|
@@ -133,11 +135,31 @@ console.log(response.timings);
|
|
|
133
135
|
// { dns: 12, tcp: 8, tls: 45, firstByte: 23, total: 156 }
|
|
134
136
|
```
|
|
135
137
|
|
|
136
|
-
### Scraping
|
|
138
|
+
### Scraping & Spider
|
|
137
139
|
|
|
138
140
|
```typescript
|
|
141
|
+
// Scrape single page
|
|
139
142
|
const doc = await client.scrape('https://example.com');
|
|
140
143
|
const titles = doc.selectAll('h1').map(el => el.text());
|
|
144
|
+
|
|
145
|
+
// Crawl entire site
|
|
146
|
+
import { spider } from 'recker/scrape';
|
|
147
|
+
const result = await spider('https://example.com', { maxPages: 50 });
|
|
148
|
+
console.log(`Crawled ${result.pages.length} pages`);
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### SEO Analysis
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
import { analyzeSeo, seoSpider } from 'recker/seo';
|
|
155
|
+
|
|
156
|
+
// Single page analysis - 250+ checks across 21 categories
|
|
157
|
+
const report = await analyzeSeo(html, { baseUrl: 'https://example.com' });
|
|
158
|
+
console.log(`Score: ${report.score}/100 (${report.grade})`);
|
|
159
|
+
|
|
160
|
+
// Site-wide analysis - detect duplicates and orphan pages
|
|
161
|
+
const siteReport = await seoSpider('https://example.com', { seo: true });
|
|
162
|
+
console.log(`Duplicate titles: ${siteReport.summary.duplicateTitles}`);
|
|
141
163
|
```
|
|
142
164
|
|
|
143
165
|
### Circuit Breaker
|
|
@@ -174,6 +196,9 @@ rek -o data.json api.com/export
|
|
|
174
196
|
# Interactive shell
|
|
175
197
|
rek shell
|
|
176
198
|
|
|
199
|
+
# SEO analysis
|
|
200
|
+
rek seo https://example.com
|
|
201
|
+
|
|
177
202
|
# Mock servers for testing
|
|
178
203
|
rek serve http # HTTP on :3000
|
|
179
204
|
rek serve ws # WebSocket on :8080
|
|
@@ -187,6 +212,8 @@ See [CLI Documentation](./docs/cli/01-overview.md) for more.
|
|
|
187
212
|
- **[Quick Start](./docs/http/01-quickstart.md)** - Get running in 2 minutes
|
|
188
213
|
- **[Mini Client](./docs/http/18-mini-client.md)** - Maximum performance mode
|
|
189
214
|
- **[CLI Guide](./docs/cli/01-overview.md)** - Terminal client documentation
|
|
215
|
+
- **[SEO Analysis](./docs/http/19-seo.md)** - 250+ rules, site-wide crawling
|
|
216
|
+
- **[Web Scraping](./docs/http/14-scraping.md)** - HTML parsing and Spider crawler
|
|
190
217
|
- **[API Reference](./docs/reference/01-api.md)** - Complete API documentation
|
|
191
218
|
- **[Configuration](./docs/http/05-configuration.md)** - Client options
|
|
192
219
|
- **[Plugins](./docs/http/10-plugins.md)** - Extend functionality
|
|
@@ -119,17 +119,17 @@ export class ScrollBuffer extends EventEmitter {
|
|
|
119
119
|
}
|
|
120
120
|
export function parseScrollKey(data) {
|
|
121
121
|
const str = data.toString();
|
|
122
|
-
if (str === '\x1b[5~' || str === '\x1bOy')
|
|
122
|
+
if (str === '\x1b[5~' || str === '\x1bOy' || str === '\x1b[5;5~' || str === '\x1b[5;2~')
|
|
123
123
|
return 'pageUp';
|
|
124
|
-
if (str === '\x1b[6~' || str === '\x1bOs')
|
|
124
|
+
if (str === '\x1b[6~' || str === '\x1bOs' || str === '\x1b[6;5~' || str === '\x1b[6;2~')
|
|
125
125
|
return 'pageDown';
|
|
126
126
|
if (str === '\x1b[1;2A')
|
|
127
127
|
return 'scrollUp';
|
|
128
128
|
if (str === '\x1b[1;2B')
|
|
129
129
|
return 'scrollDown';
|
|
130
|
-
if (str === '\x1b[H' || str === '\x1b[1~' || str === '\x1bOH')
|
|
130
|
+
if (str === '\x1b[H' || str === '\x1b[1~' || str === '\x1bOH' || str === '\x1b[7~')
|
|
131
131
|
return 'home';
|
|
132
|
-
if (str === '\x1b[F' || str === '\x1b[4~' || str === '\x1bOF')
|
|
132
|
+
if (str === '\x1b[F' || str === '\x1b[4~' || str === '\x1bOF' || str === '\x1b[8~')
|
|
133
133
|
return 'end';
|
|
134
134
|
if (str === 'q' || str === 'Q')
|
|
135
135
|
return 'quit';
|
package/dist/cli/tui/shell.d.ts
CHANGED
package/dist/cli/tui/shell.js
CHANGED
|
@@ -10,11 +10,12 @@ import { inspectTLS } from '../../utils/tls-inspector.js';
|
|
|
10
10
|
import { getSecurityRecords } from '../../utils/dns-toolkit.js';
|
|
11
11
|
import { rdap } from '../../utils/rdap.js';
|
|
12
12
|
import { ScrapeDocument } from '../../scrape/document.js';
|
|
13
|
+
import { Spider } from '../../scrape/spider.js';
|
|
13
14
|
import colors from '../../utils/colors.js';
|
|
14
15
|
import { getShellSearch } from './shell-search.js';
|
|
15
16
|
import { openSearchPanel } from './search-panel.js';
|
|
16
17
|
import { ScrollBuffer, parseScrollKey, parseMouseScroll, disableMouseReporting } from './scroll-buffer.js';
|
|
17
|
-
import { analyzeSeo } from '../../seo/index.js';
|
|
18
|
+
import { analyzeSeo, SeoSpider } from '../../seo/index.js';
|
|
18
19
|
let highlight;
|
|
19
20
|
async function initDependencies() {
|
|
20
21
|
if (!highlight) {
|
|
@@ -94,7 +95,7 @@ export class RekShell {
|
|
|
94
95
|
'get', 'post', 'put', 'delete', 'patch', 'head', 'options',
|
|
95
96
|
'ws', 'udp', 'load', 'chat', 'ai',
|
|
96
97
|
'whois', 'tls', 'ssl', 'security', 'ip', 'dns', 'dns:propagate', 'dns:email', 'rdap', 'ping',
|
|
97
|
-
'scrap', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
|
|
98
|
+
'scrap', 'spider', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
|
|
98
99
|
'?', 'search', 'suggest', 'example',
|
|
99
100
|
'help', 'clear', 'exit', 'set', 'url', 'vars', 'env'
|
|
100
101
|
];
|
|
@@ -172,20 +173,36 @@ export class RekShell {
|
|
|
172
173
|
}
|
|
173
174
|
return true;
|
|
174
175
|
}
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if (scrollKey
|
|
178
|
-
if (
|
|
176
|
+
try {
|
|
177
|
+
const scrollKey = parseScrollKey(data);
|
|
178
|
+
if (scrollKey) {
|
|
179
|
+
if (scrollKey === 'quit') {
|
|
180
|
+
if (self.inScrollMode) {
|
|
181
|
+
self.exitScrollMode();
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
return originalEmit(event, ...args);
|
|
185
|
+
}
|
|
186
|
+
self.handleScrollKey(scrollKey);
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
if (self.inScrollMode) {
|
|
190
|
+
if (str === '\x1b[A') {
|
|
191
|
+
self.handleScrollKey('scrollUp');
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
if (str === '\x1b[B') {
|
|
195
|
+
self.handleScrollKey('scrollDown');
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
if (str === '\x1b' || str === '\x1b\x1b') {
|
|
179
199
|
self.exitScrollMode();
|
|
180
200
|
return true;
|
|
181
201
|
}
|
|
182
|
-
return
|
|
202
|
+
return true;
|
|
183
203
|
}
|
|
184
|
-
self.handleScrollKey(scrollKey);
|
|
185
|
-
return true;
|
|
186
204
|
}
|
|
187
|
-
|
|
188
|
-
return true;
|
|
205
|
+
catch {
|
|
189
206
|
}
|
|
190
207
|
}
|
|
191
208
|
return originalEmit(event, ...args);
|
|
@@ -193,6 +210,9 @@ export class RekShell {
|
|
|
193
210
|
}
|
|
194
211
|
}
|
|
195
212
|
handleScrollKey(key) {
|
|
213
|
+
if (!this.originalStdoutWrite) {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
196
216
|
let needsRedraw = false;
|
|
197
217
|
switch (key) {
|
|
198
218
|
case 'pageUp':
|
|
@@ -249,11 +269,15 @@ export class RekShell {
|
|
|
249
269
|
enterScrollMode() {
|
|
250
270
|
if (this.inScrollMode)
|
|
251
271
|
return;
|
|
272
|
+
if (!this.originalStdoutWrite)
|
|
273
|
+
return;
|
|
252
274
|
this.inScrollMode = true;
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
275
|
+
try {
|
|
276
|
+
this.rl.pause();
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
256
279
|
}
|
|
280
|
+
this.originalStdoutWrite('\x1b[?25l');
|
|
257
281
|
this.renderScrollView();
|
|
258
282
|
}
|
|
259
283
|
exitScrollMode() {
|
|
@@ -287,7 +311,7 @@ export class RekShell {
|
|
|
287
311
|
const scrollInfo = this.scrollBuffer.isScrolledUp
|
|
288
312
|
? colors.yellow(`↑ ${this.scrollBuffer.position} lines | ${info.percent}% | `)
|
|
289
313
|
: '';
|
|
290
|
-
const helpText = colors.gray('
|
|
314
|
+
const helpText = colors.gray('↑↓/PgUp/PgDn • Home/End • Esc/Q to exit');
|
|
291
315
|
const statusBar = `\x1b[${rows};1H\x1b[7m ${scrollInfo}${helpText} \x1b[0m`;
|
|
292
316
|
this.originalStdoutWrite(statusBar);
|
|
293
317
|
}
|
|
@@ -368,6 +392,9 @@ export class RekShell {
|
|
|
368
392
|
case 'scrap':
|
|
369
393
|
await this.runScrap(parts[1]);
|
|
370
394
|
return;
|
|
395
|
+
case 'spider':
|
|
396
|
+
await this.runSpider(parts.slice(1));
|
|
397
|
+
return;
|
|
371
398
|
case '$':
|
|
372
399
|
await this.runSelect(parts.slice(1).join(' '));
|
|
373
400
|
return;
|
|
@@ -972,11 +999,20 @@ ${colors.bold('Details:')}`);
|
|
|
972
999
|
const html = await res.text();
|
|
973
1000
|
const duration = Math.round(performance.now() - startTime);
|
|
974
1001
|
const report = await analyzeSeo(html, { baseUrl: url });
|
|
1002
|
+
const t = res.timings;
|
|
1003
|
+
report.timing = {
|
|
1004
|
+
ttfb: t?.firstByte ? Math.round(t.firstByte) : undefined,
|
|
1005
|
+
total: t?.total ? Math.round(t.total) : duration,
|
|
1006
|
+
dns: t?.dns ? Math.round(t.dns) : undefined,
|
|
1007
|
+
tcp: t?.tcp ? Math.round(t.tcp) : undefined,
|
|
1008
|
+
tls: t?.tls ? Math.round(t.tls) : undefined,
|
|
1009
|
+
download: t?.content ? Math.round(t.content) : undefined,
|
|
1010
|
+
};
|
|
975
1011
|
if (jsonOutput) {
|
|
976
1012
|
const jsonResult = {
|
|
977
1013
|
url,
|
|
978
1014
|
analyzedAt: new Date().toISOString(),
|
|
979
|
-
|
|
1015
|
+
timing: report.timing,
|
|
980
1016
|
score: report.score,
|
|
981
1017
|
grade: report.grade,
|
|
982
1018
|
title: report.title,
|
|
@@ -985,8 +1021,9 @@ ${colors.bold('Details:')}`);
|
|
|
985
1021
|
headings: report.headings,
|
|
986
1022
|
links: report.links,
|
|
987
1023
|
images: report.images,
|
|
988
|
-
openGraph: report.
|
|
989
|
-
twitterCard: report.
|
|
1024
|
+
openGraph: report.openGraph,
|
|
1025
|
+
twitterCard: report.twitterCard,
|
|
1026
|
+
social: report.social,
|
|
990
1027
|
jsonLd: report.jsonLd,
|
|
991
1028
|
technical: report.technical,
|
|
992
1029
|
checks: report.checks,
|
|
@@ -1024,6 +1061,50 @@ Grade: ${gradeColor(colors.bold(report.grade))} (${report.score}/100)
|
|
|
1024
1061
|
: report.metaDescription.text;
|
|
1025
1062
|
console.log(colors.bold('Description:') + ` ${desc} ` + colors.gray(`(${report.metaDescription.length} chars)`));
|
|
1026
1063
|
}
|
|
1064
|
+
if (report.openGraph && Object.values(report.openGraph).some(v => v)) {
|
|
1065
|
+
console.log('');
|
|
1066
|
+
console.log(colors.bold(colors.cyan('OpenGraph:')));
|
|
1067
|
+
if (report.openGraph.title) {
|
|
1068
|
+
const ogTitle = report.openGraph.title.length > 60
|
|
1069
|
+
? report.openGraph.title.slice(0, 57) + '...'
|
|
1070
|
+
: report.openGraph.title;
|
|
1071
|
+
console.log(` ${colors.gray('og:title:')} ${ogTitle}`);
|
|
1072
|
+
}
|
|
1073
|
+
if (report.openGraph.description) {
|
|
1074
|
+
const ogDesc = report.openGraph.description.length > 60
|
|
1075
|
+
? report.openGraph.description.slice(0, 57) + '...'
|
|
1076
|
+
: report.openGraph.description;
|
|
1077
|
+
console.log(` ${colors.gray('og:description:')} ${ogDesc}`);
|
|
1078
|
+
}
|
|
1079
|
+
if (report.openGraph.image) {
|
|
1080
|
+
const ogImg = report.openGraph.image.length > 50
|
|
1081
|
+
? '...' + report.openGraph.image.slice(-47)
|
|
1082
|
+
: report.openGraph.image;
|
|
1083
|
+
console.log(` ${colors.gray('og:image:')} ${colors.blue(ogImg)}`);
|
|
1084
|
+
}
|
|
1085
|
+
if (report.openGraph.type) {
|
|
1086
|
+
console.log(` ${colors.gray('og:type:')} ${report.openGraph.type}`);
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
if (report.timing) {
|
|
1090
|
+
const t = report.timing;
|
|
1091
|
+
console.log('');
|
|
1092
|
+
console.log(colors.bold('Timing:'));
|
|
1093
|
+
const timings = [];
|
|
1094
|
+
if (t.dns !== undefined)
|
|
1095
|
+
timings.push(`DNS ${t.dns}ms`);
|
|
1096
|
+
if (t.tcp !== undefined)
|
|
1097
|
+
timings.push(`TCP ${t.tcp}ms`);
|
|
1098
|
+
if (t.tls !== undefined)
|
|
1099
|
+
timings.push(`TLS ${t.tls}ms`);
|
|
1100
|
+
if (t.ttfb !== undefined)
|
|
1101
|
+
timings.push(`TTFB ${t.ttfb}ms`);
|
|
1102
|
+
if (t.download !== undefined)
|
|
1103
|
+
timings.push(`Download ${t.download}ms`);
|
|
1104
|
+
if (t.total !== undefined)
|
|
1105
|
+
timings.push(`Total ${t.total}ms`);
|
|
1106
|
+
console.log(` ${timings.join(' → ')}`);
|
|
1107
|
+
}
|
|
1027
1108
|
if (report.content) {
|
|
1028
1109
|
console.log(colors.bold('Content:') + ` ${report.content.wordCount} words, ${report.content.paragraphCount} paragraphs, ~${report.content.readingTimeMinutes} min read`);
|
|
1029
1110
|
}
|
|
@@ -1434,6 +1515,274 @@ ${colors.bold('Network:')}
|
|
|
1434
1515
|
}
|
|
1435
1516
|
console.log('');
|
|
1436
1517
|
}
|
|
1518
|
+
async runSpider(args) {
|
|
1519
|
+
let url = '';
|
|
1520
|
+
let maxDepth = 3;
|
|
1521
|
+
let maxPages = 100;
|
|
1522
|
+
let concurrency = 5;
|
|
1523
|
+
let seoEnabled = false;
|
|
1524
|
+
let outputFile = '';
|
|
1525
|
+
for (let i = 0; i < args.length; i++) {
|
|
1526
|
+
const arg = args[i];
|
|
1527
|
+
if (arg.startsWith('depth=')) {
|
|
1528
|
+
maxDepth = parseInt(arg.split('=')[1]) || 4;
|
|
1529
|
+
}
|
|
1530
|
+
else if (arg.startsWith('limit=')) {
|
|
1531
|
+
maxPages = parseInt(arg.split('=')[1]) || 100;
|
|
1532
|
+
}
|
|
1533
|
+
else if (arg.startsWith('concurrency=')) {
|
|
1534
|
+
concurrency = parseInt(arg.split('=')[1]) || 5;
|
|
1535
|
+
}
|
|
1536
|
+
else if (arg === 'seo') {
|
|
1537
|
+
seoEnabled = true;
|
|
1538
|
+
}
|
|
1539
|
+
else if (arg.startsWith('output=')) {
|
|
1540
|
+
outputFile = arg.split('=')[1] || '';
|
|
1541
|
+
}
|
|
1542
|
+
else if (!arg.includes('=')) {
|
|
1543
|
+
url = arg;
|
|
1544
|
+
}
|
|
1545
|
+
}
|
|
1546
|
+
if (!url) {
|
|
1547
|
+
if (!this.baseUrl) {
|
|
1548
|
+
console.log(colors.yellow('Usage: spider <url> [options]'));
|
|
1549
|
+
console.log(colors.gray(' Options:'));
|
|
1550
|
+
console.log(colors.gray(' depth=4 Max crawl depth'));
|
|
1551
|
+
console.log(colors.gray(' limit=100 Max pages to crawl'));
|
|
1552
|
+
console.log(colors.gray(' concurrency=5 Concurrent requests'));
|
|
1553
|
+
console.log(colors.gray(' seo Enable SEO analysis'));
|
|
1554
|
+
console.log(colors.gray(' output=file.json Save JSON report'));
|
|
1555
|
+
console.log(colors.gray(' Examples:'));
|
|
1556
|
+
console.log(colors.gray(' spider example.com'));
|
|
1557
|
+
console.log(colors.gray(' spider example.com depth=2 limit=50'));
|
|
1558
|
+
console.log(colors.gray(' spider example.com seo output=seo-report.json'));
|
|
1559
|
+
return;
|
|
1560
|
+
}
|
|
1561
|
+
url = this.baseUrl;
|
|
1562
|
+
}
|
|
1563
|
+
else if (!url.startsWith('http')) {
|
|
1564
|
+
url = `https://${url}`;
|
|
1565
|
+
}
|
|
1566
|
+
console.log(colors.cyan(`\nSpider starting: ${url}`));
|
|
1567
|
+
const modeLabel = seoEnabled ? colors.magenta(' + SEO') : '';
|
|
1568
|
+
console.log(colors.gray(` Depth: ${maxDepth} | Limit: ${maxPages} | Concurrency: ${concurrency}${modeLabel}`));
|
|
1569
|
+
if (outputFile) {
|
|
1570
|
+
console.log(colors.gray(` Output: ${outputFile}`));
|
|
1571
|
+
}
|
|
1572
|
+
console.log('');
|
|
1573
|
+
if (seoEnabled) {
|
|
1574
|
+
const seoSpider = new SeoSpider({
|
|
1575
|
+
maxDepth,
|
|
1576
|
+
maxPages,
|
|
1577
|
+
concurrency,
|
|
1578
|
+
sameDomain: true,
|
|
1579
|
+
delay: 100,
|
|
1580
|
+
seo: true,
|
|
1581
|
+
output: outputFile || undefined,
|
|
1582
|
+
onProgress: (progress) => {
|
|
1583
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
1584
|
+
},
|
|
1585
|
+
});
|
|
1586
|
+
try {
|
|
1587
|
+
const result = await seoSpider.crawl(url);
|
|
1588
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
1589
|
+
console.log(colors.green(`\n✔ SEO Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
1590
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
1591
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
1592
|
+
console.log(` ${colors.cyan('Avg SEO Score')}: ${result.summary.avgScore}/100`);
|
|
1593
|
+
const responseTimes = result.pages.filter(p => p.duration > 0).map(p => p.duration);
|
|
1594
|
+
const avgResponseTime = responseTimes.length > 0
|
|
1595
|
+
? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length)
|
|
1596
|
+
: 0;
|
|
1597
|
+
const minResponseTime = responseTimes.length > 0 ? Math.min(...responseTimes) : 0;
|
|
1598
|
+
const maxResponseTime = responseTimes.length > 0 ? Math.max(...responseTimes) : 0;
|
|
1599
|
+
const reqPerSec = result.duration > 0 ? (result.pages.length / (result.duration / 1000)).toFixed(1) : '0';
|
|
1600
|
+
const statusCounts = new Map();
|
|
1601
|
+
for (const page of result.pages) {
|
|
1602
|
+
const status = page.status || 0;
|
|
1603
|
+
statusCounts.set(status, (statusCounts.get(status) || 0) + 1);
|
|
1604
|
+
}
|
|
1605
|
+
let totalInternalLinks = 0;
|
|
1606
|
+
let totalExternalLinks = 0;
|
|
1607
|
+
let totalImages = 0;
|
|
1608
|
+
let imagesWithoutAlt = 0;
|
|
1609
|
+
let pagesWithoutTitle = 0;
|
|
1610
|
+
let pagesWithoutDescription = 0;
|
|
1611
|
+
for (const page of result.pages) {
|
|
1612
|
+
if (page.seoReport) {
|
|
1613
|
+
totalInternalLinks += page.seoReport.links?.internal || 0;
|
|
1614
|
+
totalExternalLinks += page.seoReport.links?.external || 0;
|
|
1615
|
+
totalImages += page.seoReport.images?.total || 0;
|
|
1616
|
+
imagesWithoutAlt += page.seoReport.images?.withoutAlt || 0;
|
|
1617
|
+
if (!page.seoReport.title?.text)
|
|
1618
|
+
pagesWithoutTitle++;
|
|
1619
|
+
if (!page.seoReport.metaDescription?.text)
|
|
1620
|
+
pagesWithoutDescription++;
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
console.log(colors.bold('\n Performance:'));
|
|
1624
|
+
console.log(` ${colors.gray('Avg Response:')} ${avgResponseTime}ms`);
|
|
1625
|
+
console.log(` ${colors.gray('Min/Max:')} ${minResponseTime}ms / ${maxResponseTime}ms`);
|
|
1626
|
+
console.log(` ${colors.gray('Throughput:')} ${reqPerSec} req/s`);
|
|
1627
|
+
console.log(colors.bold('\n HTTP Status:'));
|
|
1628
|
+
const sortedStatuses = Array.from(statusCounts.entries()).sort((a, b) => b[1] - a[1]);
|
|
1629
|
+
for (const [status, count] of sortedStatuses.slice(0, 5)) {
|
|
1630
|
+
const statusLabel = status === 0 ? 'Error' : status.toString();
|
|
1631
|
+
const statusColor = status >= 400 || status === 0 ? colors.red :
|
|
1632
|
+
status >= 300 ? colors.yellow : colors.green;
|
|
1633
|
+
const pct = ((count / result.pages.length) * 100).toFixed(0);
|
|
1634
|
+
console.log(` ${statusColor(statusLabel.padEnd(5))} ${count.toString().padStart(3)} (${pct}%)`);
|
|
1635
|
+
}
|
|
1636
|
+
console.log(colors.bold('\n Content:'));
|
|
1637
|
+
console.log(` ${colors.gray('Internal links:')} ${totalInternalLinks.toLocaleString()}`);
|
|
1638
|
+
console.log(` ${colors.gray('External links:')} ${totalExternalLinks.toLocaleString()}`);
|
|
1639
|
+
console.log(` ${colors.gray('Images:')} ${totalImages.toLocaleString()} (${imagesWithoutAlt} missing alt)`);
|
|
1640
|
+
console.log(` ${colors.gray('Missing title:')} ${pagesWithoutTitle}`);
|
|
1641
|
+
console.log(` ${colors.gray('Missing desc:')} ${pagesWithoutDescription}`);
|
|
1642
|
+
console.log(colors.bold('\n SEO Summary:'));
|
|
1643
|
+
const { summary } = result;
|
|
1644
|
+
console.log(` ${colors.red('✗')} Pages with errors: ${summary.pagesWithErrors}`);
|
|
1645
|
+
console.log(` ${colors.yellow('⚠')} Pages with warnings: ${summary.pagesWithWarnings}`);
|
|
1646
|
+
console.log(` ${colors.magenta('⚐')} Duplicate titles: ${summary.duplicateTitles}`);
|
|
1647
|
+
console.log(` ${colors.magenta('⚐')} Duplicate descriptions:${summary.duplicateDescriptions}`);
|
|
1648
|
+
console.log(` ${colors.magenta('⚐')} Duplicate H1s: ${summary.duplicateH1s}`);
|
|
1649
|
+
console.log(` ${colors.gray('○')} Orphan pages: ${summary.orphanPages}`);
|
|
1650
|
+
if (result.siteWideIssues.length > 0) {
|
|
1651
|
+
console.log(colors.bold('\n Site-Wide Issues:'));
|
|
1652
|
+
for (const issue of result.siteWideIssues.slice(0, 10)) {
|
|
1653
|
+
const icon = issue.severity === 'error' ? colors.red('✗') :
|
|
1654
|
+
issue.severity === 'warning' ? colors.yellow('⚠') : colors.gray('○');
|
|
1655
|
+
console.log(` ${icon} ${issue.message}`);
|
|
1656
|
+
if (issue.value) {
|
|
1657
|
+
const truncatedValue = issue.value.length > 50 ? issue.value.slice(0, 47) + '...' : issue.value;
|
|
1658
|
+
console.log(` ${colors.gray(`"${truncatedValue}"`)}`);
|
|
1659
|
+
}
|
|
1660
|
+
const uniquePaths = [...new Set(issue.affectedUrls.map(u => new URL(u).pathname))];
|
|
1661
|
+
if (uniquePaths.length <= 3) {
|
|
1662
|
+
for (const path of uniquePaths) {
|
|
1663
|
+
console.log(` ${colors.gray('→')} ${path}`);
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
else {
|
|
1667
|
+
console.log(` ${colors.gray(`→ ${uniquePaths.length} pages affected`)}`);
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
if (result.siteWideIssues.length > 10) {
|
|
1671
|
+
console.log(colors.gray(` ... and ${result.siteWideIssues.length - 10} more issues`));
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
const pagesWithScores = result.pages
|
|
1675
|
+
.filter(p => p.seoReport)
|
|
1676
|
+
.sort((a, b) => (a.seoReport?.score || 0) - (b.seoReport?.score || 0));
|
|
1677
|
+
const seenPaths = new Set();
|
|
1678
|
+
const uniquePages = pagesWithScores.filter(page => {
|
|
1679
|
+
const path = new URL(page.url).pathname;
|
|
1680
|
+
if (seenPaths.has(path))
|
|
1681
|
+
return false;
|
|
1682
|
+
seenPaths.add(path);
|
|
1683
|
+
return true;
|
|
1684
|
+
});
|
|
1685
|
+
if (uniquePages.length > 0) {
|
|
1686
|
+
console.log(colors.bold('\n Pages by SEO Score:'));
|
|
1687
|
+
const worstPages = uniquePages.slice(0, 5);
|
|
1688
|
+
for (const page of worstPages) {
|
|
1689
|
+
const score = page.seoReport?.score || 0;
|
|
1690
|
+
const grade = page.seoReport?.grade || '?';
|
|
1691
|
+
const path = new URL(page.url).pathname;
|
|
1692
|
+
const scoreColor = score >= 80 ? colors.green : score >= 60 ? colors.yellow : colors.red;
|
|
1693
|
+
console.log(` ${scoreColor(`${score.toString().padStart(3)}`)} ${colors.gray(`[${grade}]`)} ${path.slice(0, 50)}`);
|
|
1694
|
+
}
|
|
1695
|
+
if (uniquePages.length > 5) {
|
|
1696
|
+
console.log(colors.gray(` ... and ${uniquePages.length - 5} more pages`));
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
if (outputFile) {
|
|
1700
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
1701
|
+
}
|
|
1702
|
+
this.lastResponse = result;
|
|
1703
|
+
console.log(colors.gray('\n Result stored in lastResponse.'));
|
|
1704
|
+
}
|
|
1705
|
+
catch (error) {
|
|
1706
|
+
console.error(colors.red(`SEO Spider failed: ${error.message}`));
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
else {
|
|
1710
|
+
const spider = new Spider({
|
|
1711
|
+
maxDepth,
|
|
1712
|
+
maxPages,
|
|
1713
|
+
concurrency,
|
|
1714
|
+
sameDomain: true,
|
|
1715
|
+
delay: 100,
|
|
1716
|
+
onProgress: (progress) => {
|
|
1717
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
1718
|
+
},
|
|
1719
|
+
});
|
|
1720
|
+
try {
|
|
1721
|
+
const result = await spider.crawl(url);
|
|
1722
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
1723
|
+
console.log(colors.green(`\n✔ Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
1724
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
1725
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
1726
|
+
console.log(` ${colors.cyan('Errors')}: ${result.errors.length}`);
|
|
1727
|
+
const byDepth = new Map();
|
|
1728
|
+
for (const page of result.pages) {
|
|
1729
|
+
byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
|
|
1730
|
+
}
|
|
1731
|
+
console.log(colors.bold('\n Pages by depth:'));
|
|
1732
|
+
for (const [depth, count] of Array.from(byDepth.entries()).sort((a, b) => a[0] - b[0])) {
|
|
1733
|
+
const bar = '█'.repeat(Math.min(count, 40));
|
|
1734
|
+
console.log(` ${colors.gray(`d${depth}:`)} ${bar} ${count}`);
|
|
1735
|
+
}
|
|
1736
|
+
const topPages = [...result.pages]
|
|
1737
|
+
.filter(p => !p.error)
|
|
1738
|
+
.sort((a, b) => b.links.length - a.links.length)
|
|
1739
|
+
.slice(0, 10);
|
|
1740
|
+
if (topPages.length > 0) {
|
|
1741
|
+
console.log(colors.bold('\n Top pages by outgoing links:'));
|
|
1742
|
+
for (const page of topPages) {
|
|
1743
|
+
const title = page.title.slice(0, 40) || new URL(page.url).pathname;
|
|
1744
|
+
console.log(` ${colors.cyan(page.links.length.toString().padStart(3))} ${title}`);
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
const formatError = (error) => {
|
|
1748
|
+
const statusMatch = error.match(/status code (\d{3})/i);
|
|
1749
|
+
if (statusMatch) {
|
|
1750
|
+
return `HTTP ${statusMatch[1]}`;
|
|
1751
|
+
}
|
|
1752
|
+
return error.length > 50 ? error.slice(0, 47) + '...' : error;
|
|
1753
|
+
};
|
|
1754
|
+
if (result.errors.length > 0 && result.errors.length <= 10) {
|
|
1755
|
+
console.log(colors.bold('\n Errors:'));
|
|
1756
|
+
for (const err of result.errors) {
|
|
1757
|
+
const path = new URL(err.url).pathname;
|
|
1758
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(25)} ${colors.gray('→')} ${formatError(err.error)}`);
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
else if (result.errors.length > 10) {
|
|
1762
|
+
console.log(colors.yellow(`\n ${result.errors.length} errors (showing first 10):`));
|
|
1763
|
+
for (const err of result.errors.slice(0, 10)) {
|
|
1764
|
+
const path = new URL(err.url).pathname;
|
|
1765
|
+
console.log(` ${colors.red('✗')} ${path.padEnd(25)} ${colors.gray('→')} ${formatError(err.error)}`);
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
if (outputFile) {
|
|
1769
|
+
const reportData = {
|
|
1770
|
+
...result,
|
|
1771
|
+
visited: Array.from(result.visited),
|
|
1772
|
+
generatedAt: new Date().toISOString(),
|
|
1773
|
+
};
|
|
1774
|
+
await fs.writeFile(outputFile, JSON.stringify(reportData, null, 2), 'utf-8');
|
|
1775
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
1776
|
+
}
|
|
1777
|
+
this.lastResponse = result;
|
|
1778
|
+
console.log(colors.gray('\n Result stored in lastResponse. Use $links to explore.'));
|
|
1779
|
+
}
|
|
1780
|
+
catch (error) {
|
|
1781
|
+
console.error(colors.red(`Spider failed: ${error.message}`));
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
console.log('');
|
|
1785
|
+
}
|
|
1437
1786
|
async runSelect(selector) {
|
|
1438
1787
|
if (!this.currentDoc) {
|
|
1439
1788
|
console.log(colors.yellow('No document loaded. Use "scrap <url>" first.'));
|
|
@@ -2358,6 +2707,13 @@ ${colors.bold('Network:')}
|
|
|
2358
2707
|
${colors.green('$beautify:save [f]')} Save beautified code to file.
|
|
2359
2708
|
${colors.green('$table <selector>')} Extract table as data.
|
|
2360
2709
|
|
|
2710
|
+
${colors.bold('Web Crawler:')}
|
|
2711
|
+
${colors.green('spider <url>')} Crawl website following internal links.
|
|
2712
|
+
${colors.gray('Options:')}
|
|
2713
|
+
${colors.white('--depth=4')} ${colors.gray('Maximum depth to crawl')}
|
|
2714
|
+
${colors.white('--limit=100')} ${colors.gray('Maximum pages to crawl')}
|
|
2715
|
+
${colors.white('--concurrency=5')} ${colors.gray('Parallel requests')}
|
|
2716
|
+
|
|
2361
2717
|
${colors.bold('Documentation:')}
|
|
2362
2718
|
${colors.green('? <query>')} Search Recker documentation.
|
|
2363
2719
|
${colors.green('search <query>')} Alias for ? (hybrid fuzzy+semantic search).
|
|
@@ -2375,6 +2731,7 @@ ${colors.bold('Network:')}
|
|
|
2375
2731
|
› post /post name="Neo" active:=true role:Admin
|
|
2376
2732
|
› load /heavy-endpoint users=100 mode=stress
|
|
2377
2733
|
› chat openai gpt-5.1
|
|
2734
|
+
› spider example.com depth=2 limit=50
|
|
2378
2735
|
`);
|
|
2379
2736
|
}
|
|
2380
2737
|
}
|
package/dist/mcp/server.js
CHANGED
|
@@ -8,6 +8,7 @@ import { createHybridSearch } from './search/index.js';
|
|
|
8
8
|
import { UnsupportedError } from '../core/errors.js';
|
|
9
9
|
import { getIpInfo, isValidIP, isGeoIPAvailable, isBogon, isIPv6 } from './ip-intel.js';
|
|
10
10
|
import { networkTools, networkToolHandlers } from './tools/network.js';
|
|
11
|
+
import { seoTools, seoToolHandlers } from './tools/seo.js';
|
|
11
12
|
import { ToolRegistry } from './tools/registry.js';
|
|
12
13
|
import { loadToolModules } from './tools/loader.js';
|
|
13
14
|
export class MCPServer {
|
|
@@ -45,6 +46,10 @@ export class MCPServer {
|
|
|
45
46
|
tools: networkTools,
|
|
46
47
|
handlers: networkToolHandlers
|
|
47
48
|
});
|
|
49
|
+
this.toolRegistry.registerModule({
|
|
50
|
+
tools: seoTools,
|
|
51
|
+
handlers: seoToolHandlers
|
|
52
|
+
});
|
|
48
53
|
}
|
|
49
54
|
indexReady = null;
|
|
50
55
|
async ensureIndexReady() {
|