recker 1.0.93 → 1.0.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/scrape/index.d.ts +3 -1
- package/dist/browser/scrape/index.js +2 -0
- package/dist/browser/scrape/spider.d.ts +35 -2
- package/dist/browser/scrape/spider.js +146 -44
- package/dist/browser/scrape/sqlite-crawl-queue.d.ts +24 -0
- package/dist/browser/scrape/sqlite-crawl-queue.js +118 -0
- package/dist/browser/scrape/sqlite-crawl-storage.d.ts +26 -0
- package/dist/browser/scrape/sqlite-crawl-storage.js +76 -0
- package/dist/browser/seo/seo-spider.d.ts +2 -0
- package/dist/browser/seo/seo-spider.js +17 -3
- package/dist/cli/commands/spider-runner.js +2 -1
- package/dist/cli/index.js +0 -3
- package/dist/cli/tui/jobs/spider-job.js +2 -1
- package/dist/mcp/prompts/index.js +15 -6
- package/dist/scrape/index.d.ts +3 -1
- package/dist/scrape/index.js +2 -0
- package/dist/scrape/spider.d.ts +35 -2
- package/dist/scrape/spider.js +146 -44
- package/dist/scrape/sqlite-crawl-queue.d.ts +24 -0
- package/dist/scrape/sqlite-crawl-queue.js +118 -0
- package/dist/scrape/sqlite-crawl-storage.d.ts +26 -0
- package/dist/scrape/sqlite-crawl-storage.js +76 -0
- package/dist/seo/seo-spider.d.ts +2 -0
- package/dist/seo/seo-spider.js +17 -3
- package/dist/version.js +1 -1
- package/package.json +7 -1
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import * as os from 'node:os';
|
|
3
|
+
import * as crypto from 'node:crypto';
|
|
4
|
+
export class SqliteCrawlStorage {
|
|
5
|
+
db;
|
|
6
|
+
ownsDb;
|
|
7
|
+
stmts;
|
|
8
|
+
constructor(db, ownsDb) {
|
|
9
|
+
this.db = db;
|
|
10
|
+
this.ownsDb = ownsDb;
|
|
11
|
+
}
|
|
12
|
+
static async create(opts) {
|
|
13
|
+
let db = opts?.db;
|
|
14
|
+
let ownsDb = false;
|
|
15
|
+
if (!db) {
|
|
16
|
+
const dbPath = opts?.dbPath ?? path.join(os.tmpdir(), `recker-storage-${crypto.randomUUID().slice(0, 8)}.db`);
|
|
17
|
+
const BetterSqlite3 = (await import('better-sqlite3')).default;
|
|
18
|
+
db = new BetterSqlite3(dbPath);
|
|
19
|
+
db.pragma('journal_mode = WAL');
|
|
20
|
+
ownsDb = true;
|
|
21
|
+
}
|
|
22
|
+
const instance = new SqliteCrawlStorage(db, ownsDb);
|
|
23
|
+
instance.init();
|
|
24
|
+
return instance;
|
|
25
|
+
}
|
|
26
|
+
init() {
|
|
27
|
+
this.db.exec(`
|
|
28
|
+
CREATE TABLE IF NOT EXISTS results (
|
|
29
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
30
|
+
url TEXT NOT NULL,
|
|
31
|
+
status INTEGER NOT NULL,
|
|
32
|
+
data TEXT NOT NULL
|
|
33
|
+
);
|
|
34
|
+
CREATE TABLE IF NOT EXISTS errors (
|
|
35
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
36
|
+
url TEXT NOT NULL,
|
|
37
|
+
error TEXT NOT NULL
|
|
38
|
+
);
|
|
39
|
+
`);
|
|
40
|
+
this.stmts = {
|
|
41
|
+
saveResult: this.db.prepare('INSERT INTO results (url, status, data) VALUES (?, ?, ?)'),
|
|
42
|
+
saveError: this.db.prepare('INSERT INTO errors (url, error) VALUES (?, ?)'),
|
|
43
|
+
resultCount: this.db.prepare('SELECT COUNT(*) AS cnt FROM results'),
|
|
44
|
+
allResults: this.db.prepare('SELECT data FROM results'),
|
|
45
|
+
allErrors: this.db.prepare('SELECT url, error FROM errors'),
|
|
46
|
+
clearResults: this.db.prepare('DELETE FROM results'),
|
|
47
|
+
clearErrors: this.db.prepare('DELETE FROM errors'),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
async saveResult(result) {
|
|
51
|
+
this.stmts.saveResult.run(result.url, result.status, JSON.stringify(result));
|
|
52
|
+
}
|
|
53
|
+
async saveError(error) {
|
|
54
|
+
this.stmts.saveError.run(error.url, error.error);
|
|
55
|
+
}
|
|
56
|
+
async getResultCount() {
|
|
57
|
+
const row = this.stmts.resultCount.get();
|
|
58
|
+
return row.cnt;
|
|
59
|
+
}
|
|
60
|
+
async getResults() {
|
|
61
|
+
const rows = this.stmts.allResults.all();
|
|
62
|
+
return rows.map((r) => JSON.parse(r.data));
|
|
63
|
+
}
|
|
64
|
+
async getErrors() {
|
|
65
|
+
return this.stmts.allErrors.all();
|
|
66
|
+
}
|
|
67
|
+
async clear() {
|
|
68
|
+
this.stmts.clearResults.run();
|
|
69
|
+
this.stmts.clearErrors.run();
|
|
70
|
+
}
|
|
71
|
+
async close() {
|
|
72
|
+
if (this.ownsDb) {
|
|
73
|
+
this.db.close();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
package/dist/seo/seo-spider.d.ts
CHANGED
|
@@ -5,6 +5,8 @@ export interface SeoSpiderOptions extends SpiderOptions {
|
|
|
5
5
|
seo?: boolean;
|
|
6
6
|
output?: string;
|
|
7
7
|
onSeoAnalysis?: (result: SeoPageResult) => void;
|
|
8
|
+
onBlocked?: (result: SeoPageResult) => void | Promise<void>;
|
|
9
|
+
onError?: (result: SeoPageResult) => void | Promise<void>;
|
|
8
10
|
focusCategories?: string[];
|
|
9
11
|
focusMode?: 'all' | 'links' | 'duplicates' | 'security' | 'ai' | 'resources';
|
|
10
12
|
}
|
package/dist/seo/seo-spider.js
CHANGED
|
@@ -32,11 +32,25 @@ export class SeoSpider {
|
|
|
32
32
|
}
|
|
33
33
|
constructor(options = {}) {
|
|
34
34
|
this.options = options;
|
|
35
|
+
const userOnPage = options.onPage;
|
|
35
36
|
this.spider = new Spider({
|
|
36
37
|
...options,
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
await this.analyzePageDuringCrawl(
|
|
38
|
+
onPage: async (event) => {
|
|
39
|
+
if (this.options.seo && event.html) {
|
|
40
|
+
await this.analyzePageDuringCrawl(event.result, event.html);
|
|
41
|
+
}
|
|
42
|
+
if (userOnPage) {
|
|
43
|
+
await userOnPage(event);
|
|
44
|
+
}
|
|
45
|
+
},
|
|
46
|
+
onBlocked: this.options.onBlocked
|
|
47
|
+
? async (pageResult) => {
|
|
48
|
+
await this.options.onBlocked({ ...pageResult });
|
|
49
|
+
}
|
|
50
|
+
: undefined,
|
|
51
|
+
onError: this.options.onError
|
|
52
|
+
? async (pageResult) => {
|
|
53
|
+
await this.options.onError({ ...pageResult });
|
|
40
54
|
}
|
|
41
55
|
: undefined,
|
|
42
56
|
});
|
package/dist/version.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "recker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.94",
|
|
4
4
|
"description": "Multi-Protocol SDK for the AI Era - HTTP, WebSocket, DNS, FTP, SFTP, Telnet, HLS unified with AI providers and MCP tools",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -241,6 +241,7 @@
|
|
|
241
241
|
"zod": "^4.3.6"
|
|
242
242
|
},
|
|
243
243
|
"peerDependencies": {
|
|
244
|
+
"better-sqlite3": "^11.0.0",
|
|
244
245
|
"cardinal": "^2.1.0",
|
|
245
246
|
"ioredis": "^5.0.0",
|
|
246
247
|
"socks": "^2.8.0",
|
|
@@ -248,6 +249,9 @@
|
|
|
248
249
|
"ws": "^8.0.0"
|
|
249
250
|
},
|
|
250
251
|
"peerDependenciesMeta": {
|
|
252
|
+
"better-sqlite3": {
|
|
253
|
+
"optional": true
|
|
254
|
+
},
|
|
251
255
|
"cardinal": {
|
|
252
256
|
"optional": true
|
|
253
257
|
},
|
|
@@ -266,6 +270,7 @@
|
|
|
266
270
|
},
|
|
267
271
|
"devDependencies": {
|
|
268
272
|
"@hapi/wreck": "^18.1.0",
|
|
273
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
269
274
|
"@types/he": "^1.2.3",
|
|
270
275
|
"@types/needle": "^3.3.0",
|
|
271
276
|
"@types/node": "^25.5.0",
|
|
@@ -274,6 +279,7 @@
|
|
|
274
279
|
"@types/ws": "^8.18.1",
|
|
275
280
|
"@vitest/coverage-v8": "^4.1.0",
|
|
276
281
|
"axios": "^1.13.6",
|
|
282
|
+
"better-sqlite3": "^12.8.0",
|
|
277
283
|
"cardinal": "^2.1.1",
|
|
278
284
|
"cross-fetch": "^4.1.0",
|
|
279
285
|
"domhandler": "^5.0.3",
|