rezo 1.0.43 → 1.0.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/entries/curl.d.ts +115 -0
- package/dist/adapters/entries/fetch.d.ts +115 -0
- package/dist/adapters/entries/http.d.ts +115 -0
- package/dist/adapters/entries/http2.d.ts +115 -0
- package/dist/adapters/entries/react-native.d.ts +115 -0
- package/dist/adapters/entries/xhr.d.ts +115 -0
- package/dist/adapters/fetch.cjs +18 -0
- package/dist/adapters/fetch.js +18 -0
- package/dist/adapters/http.cjs +18 -0
- package/dist/adapters/http.js +18 -0
- package/dist/adapters/http2.cjs +18 -0
- package/dist/adapters/http2.js +18 -0
- package/dist/adapters/index.cjs +6 -6
- package/dist/adapters/xhr.cjs +19 -0
- package/dist/adapters/xhr.js +19 -0
- package/dist/cache/index.cjs +9 -15
- package/dist/cache/index.js +0 -3
- package/dist/core/hooks.cjs +4 -2
- package/dist/core/hooks.js +4 -2
- package/dist/crawler/addon/decodo/index.cjs +1 -0
- package/dist/crawler/addon/decodo/index.js +1 -0
- package/dist/crawler/crawler-options.cjs +1 -0
- package/dist/crawler/crawler-options.js +1 -0
- package/dist/{plugin → crawler}/crawler.cjs +392 -32
- package/dist/{plugin → crawler}/crawler.js +392 -32
- package/dist/crawler/index.cjs +40 -0
- package/dist/{plugin → crawler}/index.js +4 -2
- package/dist/crawler/plugin/file-cacher.cjs +19 -0
- package/dist/crawler/plugin/file-cacher.js +19 -0
- package/dist/crawler/plugin/index.cjs +1 -0
- package/dist/crawler/plugin/index.js +1 -0
- package/dist/crawler/plugin/navigation-history.cjs +43 -0
- package/dist/crawler/plugin/navigation-history.js +43 -0
- package/dist/crawler/plugin/robots-txt.cjs +2 -0
- package/dist/crawler/plugin/robots-txt.js +2 -0
- package/dist/crawler/plugin/url-store.cjs +18 -0
- package/dist/crawler/plugin/url-store.js +18 -0
- package/dist/crawler.d.ts +430 -172
- package/dist/entries/crawler.cjs +5 -5
- package/dist/entries/crawler.js +2 -2
- package/dist/index.cjs +27 -27
- package/dist/index.d.ts +115 -0
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/platform/browser.d.ts +115 -0
- package/dist/platform/bun.d.ts +115 -0
- package/dist/platform/deno.d.ts +115 -0
- package/dist/platform/node.d.ts +115 -0
- package/dist/platform/react-native.d.ts +115 -0
- package/dist/platform/worker.d.ts +115 -0
- package/dist/proxy/index.cjs +5 -5
- package/dist/proxy/index.js +1 -1
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/utils/rate-limit-wait.cjs +217 -0
- package/dist/utils/rate-limit-wait.js +208 -0
- package/package.json +2 -6
- package/dist/cache/file-cacher.cjs +0 -270
- package/dist/cache/file-cacher.js +0 -267
- package/dist/cache/navigation-history.cjs +0 -298
- package/dist/cache/navigation-history.js +0 -296
- package/dist/cache/url-store.cjs +0 -294
- package/dist/cache/url-store.js +0 -291
- package/dist/plugin/addon/decodo/index.cjs +0 -1
- package/dist/plugin/addon/decodo/index.js +0 -1
- package/dist/plugin/crawler-options.cjs +0 -1
- package/dist/plugin/crawler-options.js +0 -1
- package/dist/plugin/index.cjs +0 -36
- /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
- /package/dist/{plugin → crawler}/scraper.cjs +0 -0
- /package/dist/{plugin → crawler}/scraper.js +0 -0
|
@@ -1,296 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { createHash } from "node:crypto";
|
|
4
|
-
function detectRuntime() {
|
|
5
|
-
if (typeof globalThis.Bun !== "undefined")
|
|
6
|
-
return "bun";
|
|
7
|
-
if (typeof globalThis.Deno !== "undefined")
|
|
8
|
-
return "deno";
|
|
9
|
-
return "node";
|
|
10
|
-
}
|
|
11
|
-
async function createDatabase(dbPath) {
|
|
12
|
-
const runtime = detectRuntime();
|
|
13
|
-
if (runtime === "bun") {
|
|
14
|
-
const { Database } = await import("bun:sqlite");
|
|
15
|
-
const db = new Database(dbPath);
|
|
16
|
-
return {
|
|
17
|
-
run: (sql, ...params) => db.run(sql, ...params),
|
|
18
|
-
get: (sql, ...params) => db.query(sql).get(...params),
|
|
19
|
-
all: (sql, ...params) => db.query(sql).all(...params),
|
|
20
|
-
close: () => db.close()
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
if (runtime === "deno") {
|
|
24
|
-
try {
|
|
25
|
-
const { Database } = await import("node:sqlite");
|
|
26
|
-
const db = new Database(dbPath);
|
|
27
|
-
return {
|
|
28
|
-
run: (sql, ...params) => db.exec(sql, params),
|
|
29
|
-
get: (sql, ...params) => {
|
|
30
|
-
const stmt = db.prepare(sql);
|
|
31
|
-
return stmt.get(...params);
|
|
32
|
-
},
|
|
33
|
-
all: (sql, ...params) => {
|
|
34
|
-
const stmt = db.prepare(sql);
|
|
35
|
-
return stmt.all(...params);
|
|
36
|
-
},
|
|
37
|
-
close: () => db.close()
|
|
38
|
-
};
|
|
39
|
-
} catch {
|
|
40
|
-
throw new Error("Deno SQLite support requires Node.js compatibility mode");
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
const { DatabaseSync } = await import("node:sqlite");
|
|
44
|
-
const db = new DatabaseSync(dbPath);
|
|
45
|
-
return {
|
|
46
|
-
run: (sql, ...params) => {
|
|
47
|
-
const stmt = db.prepare(sql);
|
|
48
|
-
stmt.run(...params);
|
|
49
|
-
},
|
|
50
|
-
get: (sql, ...params) => {
|
|
51
|
-
const stmt = db.prepare(sql);
|
|
52
|
-
return stmt.get(...params);
|
|
53
|
-
},
|
|
54
|
-
all: (sql, ...params) => {
|
|
55
|
-
const stmt = db.prepare(sql);
|
|
56
|
-
return stmt.all(...params);
|
|
57
|
-
},
|
|
58
|
-
close: () => db.close()
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export class NavigationHistory {
|
|
63
|
-
db = null;
|
|
64
|
-
options;
|
|
65
|
-
storeDir;
|
|
66
|
-
dbPath;
|
|
67
|
-
closed = false;
|
|
68
|
-
initPromise = null;
|
|
69
|
-
constructor(options = {}) {
|
|
70
|
-
this.options = {
|
|
71
|
-
storeDir: options.storeDir || "./navigation-history",
|
|
72
|
-
dbFileName: options.dbFileName || "navigation.db",
|
|
73
|
-
hashUrls: options.hashUrls ?? false
|
|
74
|
-
};
|
|
75
|
-
this.storeDir = path.resolve(this.options.storeDir);
|
|
76
|
-
this.dbPath = path.join(this.storeDir, this.options.dbFileName);
|
|
77
|
-
if (!fs.existsSync(this.storeDir)) {
|
|
78
|
-
fs.mkdirSync(this.storeDir, { recursive: true });
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
static async create(options = {}) {
|
|
82
|
-
const store = new NavigationHistory(options);
|
|
83
|
-
await store.initialize();
|
|
84
|
-
return store;
|
|
85
|
-
}
|
|
86
|
-
async initialize() {
|
|
87
|
-
if (this.initPromise)
|
|
88
|
-
return this.initPromise;
|
|
89
|
-
this.initPromise = (async () => {
|
|
90
|
-
this.db = await createDatabase(this.dbPath);
|
|
91
|
-
this.db.run(`
|
|
92
|
-
CREATE TABLE IF NOT EXISTS sessions (
|
|
93
|
-
sessionId TEXT PRIMARY KEY,
|
|
94
|
-
baseUrl TEXT NOT NULL,
|
|
95
|
-
startedAt INTEGER NOT NULL,
|
|
96
|
-
lastActivityAt INTEGER NOT NULL,
|
|
97
|
-
status TEXT DEFAULT 'running',
|
|
98
|
-
urlsVisited INTEGER DEFAULT 0,
|
|
99
|
-
urlsQueued INTEGER DEFAULT 0,
|
|
100
|
-
urlsFailed INTEGER DEFAULT 0,
|
|
101
|
-
metadata TEXT
|
|
102
|
-
)
|
|
103
|
-
`);
|
|
104
|
-
this.db.run(`
|
|
105
|
-
CREATE TABLE IF NOT EXISTS queue (
|
|
106
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
107
|
-
sessionId TEXT NOT NULL,
|
|
108
|
-
urlKey TEXT NOT NULL,
|
|
109
|
-
originalUrl TEXT NOT NULL,
|
|
110
|
-
method TEXT DEFAULT 'GET',
|
|
111
|
-
priority INTEGER DEFAULT 0,
|
|
112
|
-
body TEXT,
|
|
113
|
-
headers TEXT,
|
|
114
|
-
metadata TEXT,
|
|
115
|
-
addedAt INTEGER NOT NULL,
|
|
116
|
-
UNIQUE(sessionId, urlKey)
|
|
117
|
-
)
|
|
118
|
-
`);
|
|
119
|
-
this.db.run(`
|
|
120
|
-
CREATE TABLE IF NOT EXISTS visited (
|
|
121
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
122
|
-
sessionId TEXT NOT NULL,
|
|
123
|
-
urlKey TEXT NOT NULL,
|
|
124
|
-
originalUrl TEXT NOT NULL,
|
|
125
|
-
status INTEGER,
|
|
126
|
-
visitedAt INTEGER NOT NULL,
|
|
127
|
-
finalUrl TEXT,
|
|
128
|
-
contentType TEXT,
|
|
129
|
-
errorMessage TEXT,
|
|
130
|
-
UNIQUE(sessionId, urlKey)
|
|
131
|
-
)
|
|
132
|
-
`);
|
|
133
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_session ON queue(sessionId)");
|
|
134
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_priority ON queue(sessionId, priority DESC)");
|
|
135
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_visited_session ON visited(sessionId)");
|
|
136
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status)");
|
|
137
|
-
})();
|
|
138
|
-
return this.initPromise;
|
|
139
|
-
}
|
|
140
|
-
getUrlKey(url) {
|
|
141
|
-
if (this.options.hashUrls) {
|
|
142
|
-
return createHash("sha256").update(url).digest("hex");
|
|
143
|
-
}
|
|
144
|
-
return url;
|
|
145
|
-
}
|
|
146
|
-
async createSession(sessionId, baseUrl, metadata) {
|
|
147
|
-
if (this.closed || !this.db)
|
|
148
|
-
throw new Error("NavigationHistory is closed");
|
|
149
|
-
const now = Date.now();
|
|
150
|
-
const session = {
|
|
151
|
-
sessionId,
|
|
152
|
-
baseUrl,
|
|
153
|
-
startedAt: now,
|
|
154
|
-
lastActivityAt: now,
|
|
155
|
-
status: "running",
|
|
156
|
-
urlsVisited: 0,
|
|
157
|
-
urlsQueued: 0,
|
|
158
|
-
urlsFailed: 0,
|
|
159
|
-
metadata: metadata ? JSON.stringify(metadata) : undefined
|
|
160
|
-
};
|
|
161
|
-
this.db.run(`INSERT OR REPLACE INTO sessions (sessionId, baseUrl, startedAt, lastActivityAt, status, urlsVisited, urlsQueued, urlsFailed, metadata)
|
|
162
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, baseUrl, now, now, "running", 0, 0, 0, session.metadata ?? null);
|
|
163
|
-
return session;
|
|
164
|
-
}
|
|
165
|
-
async getSession(sessionId) {
|
|
166
|
-
if (this.closed || !this.db)
|
|
167
|
-
throw new Error("NavigationHistory is closed");
|
|
168
|
-
return this.db.get("SELECT * FROM sessions WHERE sessionId = ?", sessionId);
|
|
169
|
-
}
|
|
170
|
-
async updateSessionStatus(sessionId, status) {
|
|
171
|
-
if (this.closed || !this.db)
|
|
172
|
-
throw new Error("NavigationHistory is closed");
|
|
173
|
-
this.db.run("UPDATE sessions SET status = ?, lastActivityAt = ? WHERE sessionId = ?", status, Date.now(), sessionId);
|
|
174
|
-
}
|
|
175
|
-
async updateSessionStats(sessionId, stats) {
|
|
176
|
-
if (this.closed || !this.db)
|
|
177
|
-
throw new Error("NavigationHistory is closed");
|
|
178
|
-
const updates = ["lastActivityAt = ?"];
|
|
179
|
-
const params = [Date.now()];
|
|
180
|
-
if (stats.urlsVisited !== undefined) {
|
|
181
|
-
updates.push("urlsVisited = ?");
|
|
182
|
-
params.push(stats.urlsVisited);
|
|
183
|
-
}
|
|
184
|
-
if (stats.urlsQueued !== undefined) {
|
|
185
|
-
updates.push("urlsQueued = ?");
|
|
186
|
-
params.push(stats.urlsQueued);
|
|
187
|
-
}
|
|
188
|
-
if (stats.urlsFailed !== undefined) {
|
|
189
|
-
updates.push("urlsFailed = ?");
|
|
190
|
-
params.push(stats.urlsFailed);
|
|
191
|
-
}
|
|
192
|
-
params.push(sessionId);
|
|
193
|
-
this.db.run(`UPDATE sessions SET ${updates.join(", ")} WHERE sessionId = ?`, ...params);
|
|
194
|
-
}
|
|
195
|
-
async addToQueue(sessionId, url, options = {}) {
|
|
196
|
-
if (this.closed || !this.db)
|
|
197
|
-
throw new Error("NavigationHistory is closed");
|
|
198
|
-
const urlKey = this.getUrlKey(url);
|
|
199
|
-
const existing = this.db.get("SELECT id FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
|
|
200
|
-
if (existing)
|
|
201
|
-
return false;
|
|
202
|
-
const isVisited = this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
|
|
203
|
-
if (isVisited)
|
|
204
|
-
return false;
|
|
205
|
-
this.db.run(`INSERT INTO queue (sessionId, urlKey, originalUrl, method, priority, body, headers, metadata, addedAt)
|
|
206
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, urlKey, url, options.method || "GET", options.priority || 0, options.body ? JSON.stringify(options.body) : null, options.headers ? JSON.stringify(options.headers) : null, options.metadata ? JSON.stringify(options.metadata) : null, Date.now());
|
|
207
|
-
return true;
|
|
208
|
-
}
|
|
209
|
-
async getNextFromQueue(sessionId) {
|
|
210
|
-
if (this.closed || !this.db)
|
|
211
|
-
throw new Error("NavigationHistory is closed");
|
|
212
|
-
const item = this.db.get("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC LIMIT 1", sessionId);
|
|
213
|
-
return item;
|
|
214
|
-
}
|
|
215
|
-
async removeFromQueue(sessionId, url) {
|
|
216
|
-
if (this.closed || !this.db)
|
|
217
|
-
throw new Error("NavigationHistory is closed");
|
|
218
|
-
const urlKey = this.getUrlKey(url);
|
|
219
|
-
this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
|
|
220
|
-
return true;
|
|
221
|
-
}
|
|
222
|
-
async getQueueSize(sessionId) {
|
|
223
|
-
if (this.closed || !this.db)
|
|
224
|
-
throw new Error("NavigationHistory is closed");
|
|
225
|
-
const result = this.db.get("SELECT COUNT(*) as count FROM queue WHERE sessionId = ?", sessionId);
|
|
226
|
-
return result?.count || 0;
|
|
227
|
-
}
|
|
228
|
-
async markVisited(sessionId, url, result = {}) {
|
|
229
|
-
if (this.closed || !this.db)
|
|
230
|
-
throw new Error("NavigationHistory is closed");
|
|
231
|
-
const urlKey = this.getUrlKey(url);
|
|
232
|
-
this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
|
|
233
|
-
this.db.run(`INSERT OR REPLACE INTO visited (sessionId, urlKey, originalUrl, status, visitedAt, finalUrl, contentType, errorMessage)
|
|
234
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, urlKey, url, result.status || 0, Date.now(), result.finalUrl ?? null, result.contentType ?? null, result.errorMessage ?? null);
|
|
235
|
-
}
|
|
236
|
-
async isVisited(sessionId, url) {
|
|
237
|
-
if (this.closed || !this.db)
|
|
238
|
-
throw new Error("NavigationHistory is closed");
|
|
239
|
-
const urlKey = this.getUrlKey(url);
|
|
240
|
-
const result = this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
|
|
241
|
-
return !!result;
|
|
242
|
-
}
|
|
243
|
-
async getVisitedCount(sessionId) {
|
|
244
|
-
if (this.closed || !this.db)
|
|
245
|
-
throw new Error("NavigationHistory is closed");
|
|
246
|
-
const result = this.db.get("SELECT COUNT(*) as count FROM visited WHERE sessionId = ?", sessionId);
|
|
247
|
-
return result?.count || 0;
|
|
248
|
-
}
|
|
249
|
-
async getFailedUrls(sessionId) {
|
|
250
|
-
if (this.closed || !this.db)
|
|
251
|
-
throw new Error("NavigationHistory is closed");
|
|
252
|
-
return this.db.all("SELECT url, status, visitedAt, finalUrl, contentType, errorMessage FROM visited WHERE sessionId = ? AND (status >= 400 OR errorMessage IS NOT NULL)", sessionId);
|
|
253
|
-
}
|
|
254
|
-
async getAllQueuedUrls(sessionId) {
|
|
255
|
-
if (this.closed || !this.db)
|
|
256
|
-
throw new Error("NavigationHistory is closed");
|
|
257
|
-
return this.db.all("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC", sessionId);
|
|
258
|
-
}
|
|
259
|
-
async clearQueue(sessionId) {
|
|
260
|
-
if (this.closed || !this.db)
|
|
261
|
-
throw new Error("NavigationHistory is closed");
|
|
262
|
-
this.db.run("DELETE FROM queue WHERE sessionId = ?", sessionId);
|
|
263
|
-
}
|
|
264
|
-
async clearVisited(sessionId) {
|
|
265
|
-
if (this.closed || !this.db)
|
|
266
|
-
throw new Error("NavigationHistory is closed");
|
|
267
|
-
this.db.run("DELETE FROM visited WHERE sessionId = ?", sessionId);
|
|
268
|
-
}
|
|
269
|
-
async deleteSession(sessionId) {
|
|
270
|
-
if (this.closed || !this.db)
|
|
271
|
-
throw new Error("NavigationHistory is closed");
|
|
272
|
-
this.db.run("DELETE FROM queue WHERE sessionId = ?", sessionId);
|
|
273
|
-
this.db.run("DELETE FROM visited WHERE sessionId = ?", sessionId);
|
|
274
|
-
this.db.run("DELETE FROM sessions WHERE sessionId = ?", sessionId);
|
|
275
|
-
}
|
|
276
|
-
async getResumableSessions() {
|
|
277
|
-
if (this.closed || !this.db)
|
|
278
|
-
throw new Error("NavigationHistory is closed");
|
|
279
|
-
return this.db.all("SELECT * FROM sessions WHERE status IN ('running', 'paused') ORDER BY lastActivityAt DESC");
|
|
280
|
-
}
|
|
281
|
-
async close() {
|
|
282
|
-
if (this.closed)
|
|
283
|
-
return;
|
|
284
|
-
this.closed = true;
|
|
285
|
-
if (this.db) {
|
|
286
|
-
this.db.close();
|
|
287
|
-
this.db = null;
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
get isClosed() {
|
|
291
|
-
return this.closed;
|
|
292
|
-
}
|
|
293
|
-
get databasePath() {
|
|
294
|
-
return this.dbPath;
|
|
295
|
-
}
|
|
296
|
-
}
|
package/dist/cache/url-store.cjs
DELETED
|
@@ -1,294 +0,0 @@
|
|
|
1
|
-
const fs = require("node:fs");
|
|
2
|
-
const path = require("node:path");
|
|
3
|
-
const { createHash } = require("node:crypto");
|
|
4
|
-
function detectRuntime() {
|
|
5
|
-
if (typeof globalThis.Bun !== "undefined") {
|
|
6
|
-
return "bun";
|
|
7
|
-
}
|
|
8
|
-
if (typeof globalThis.Deno !== "undefined") {
|
|
9
|
-
return "deno";
|
|
10
|
-
}
|
|
11
|
-
return "node";
|
|
12
|
-
}
|
|
13
|
-
async function createDatabase(dbPath) {
|
|
14
|
-
const runtime = detectRuntime();
|
|
15
|
-
if (runtime === "bun") {
|
|
16
|
-
const { Database } = await import("bun:sqlite");
|
|
17
|
-
const db = new Database(dbPath);
|
|
18
|
-
return {
|
|
19
|
-
run: (sql, ...params) => db.run(sql, ...params),
|
|
20
|
-
get: (sql, ...params) => db.query(sql).get(...params),
|
|
21
|
-
all: (sql, ...params) => db.query(sql).all(...params),
|
|
22
|
-
close: () => db.close()
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
if (runtime === "deno") {
|
|
26
|
-
try {
|
|
27
|
-
const { Database } = await import("node:sqlite");
|
|
28
|
-
const db = new Database(dbPath);
|
|
29
|
-
return {
|
|
30
|
-
run: (sql, ...params) => db.exec(sql, params),
|
|
31
|
-
get: (sql, ...params) => {
|
|
32
|
-
const stmt = db.prepare(sql);
|
|
33
|
-
return stmt.get(...params);
|
|
34
|
-
},
|
|
35
|
-
all: (sql, ...params) => {
|
|
36
|
-
const stmt = db.prepare(sql);
|
|
37
|
-
return stmt.all(...params);
|
|
38
|
-
},
|
|
39
|
-
close: () => db.close()
|
|
40
|
-
};
|
|
41
|
-
} catch {
|
|
42
|
-
throw new Error("Deno SQLite support requires Node.js compatibility mode");
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
const { DatabaseSync } = await import("node:sqlite");
|
|
46
|
-
const db = new DatabaseSync(dbPath);
|
|
47
|
-
return {
|
|
48
|
-
run: (sql, ...params) => {
|
|
49
|
-
if (params.length === 0) {
|
|
50
|
-
db.exec(sql);
|
|
51
|
-
} else {
|
|
52
|
-
db.prepare(sql).run(...params);
|
|
53
|
-
}
|
|
54
|
-
},
|
|
55
|
-
get: (sql, ...params) => {
|
|
56
|
-
const stmt = db.prepare(sql);
|
|
57
|
-
return stmt.get(...params);
|
|
58
|
-
},
|
|
59
|
-
all: (sql, ...params) => {
|
|
60
|
-
const stmt = db.prepare(sql);
|
|
61
|
-
return stmt.all(...params);
|
|
62
|
-
},
|
|
63
|
-
close: () => db.close()
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
class UrlStore {
|
|
68
|
-
db = null;
|
|
69
|
-
options;
|
|
70
|
-
storeDir;
|
|
71
|
-
dbPath;
|
|
72
|
-
closed = false;
|
|
73
|
-
initPromise = null;
|
|
74
|
-
constructor(options = {}) {
|
|
75
|
-
this.options = {
|
|
76
|
-
storeDir: options.storeDir || "./url-store",
|
|
77
|
-
dbFileName: options.dbFileName || "urls.db",
|
|
78
|
-
ttl: options.ttl || 604800000,
|
|
79
|
-
maxUrls: options.maxUrls ?? 0,
|
|
80
|
-
hashUrls: options.hashUrls ?? false
|
|
81
|
-
};
|
|
82
|
-
this.storeDir = path.resolve(this.options.storeDir);
|
|
83
|
-
this.dbPath = path.join(this.storeDir, this.options.dbFileName);
|
|
84
|
-
if (!fs.existsSync(this.storeDir)) {
|
|
85
|
-
fs.mkdirSync(this.storeDir, { recursive: true });
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
static async create(options = {}) {
|
|
89
|
-
const store = new UrlStore(options);
|
|
90
|
-
await store.initialize();
|
|
91
|
-
return store;
|
|
92
|
-
}
|
|
93
|
-
async initialize() {
|
|
94
|
-
if (this.initPromise)
|
|
95
|
-
return this.initPromise;
|
|
96
|
-
this.initPromise = (async () => {
|
|
97
|
-
this.db = await createDatabase(this.dbPath);
|
|
98
|
-
this.db.run(`
|
|
99
|
-
CREATE TABLE IF NOT EXISTS urls (
|
|
100
|
-
url TEXT PRIMARY KEY,
|
|
101
|
-
visitedAt INTEGER NOT NULL,
|
|
102
|
-
expiresAt INTEGER NOT NULL,
|
|
103
|
-
namespace TEXT DEFAULT 'default',
|
|
104
|
-
metadata TEXT
|
|
105
|
-
)
|
|
106
|
-
`);
|
|
107
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_expires ON urls(expiresAt)");
|
|
108
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_namespace ON urls(namespace)");
|
|
109
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_visited ON urls(visitedAt)");
|
|
110
|
-
})();
|
|
111
|
-
return this.initPromise;
|
|
112
|
-
}
|
|
113
|
-
getUrlKey(url) {
|
|
114
|
-
if (this.options.hashUrls) {
|
|
115
|
-
return createHash("sha256").update(url).digest("hex");
|
|
116
|
-
}
|
|
117
|
-
return url;
|
|
118
|
-
}
|
|
119
|
-
async set(url, namespace = "default", metadata, ttl) {
|
|
120
|
-
if (this.closed)
|
|
121
|
-
throw new Error("UrlStore is closed");
|
|
122
|
-
await this.initialize();
|
|
123
|
-
const key = this.getUrlKey(url);
|
|
124
|
-
const now = Date.now();
|
|
125
|
-
const expiresAt = now + (ttl ?? this.options.ttl);
|
|
126
|
-
const metaStr = metadata ? JSON.stringify(metadata) : null;
|
|
127
|
-
this.db.run(`
|
|
128
|
-
INSERT OR REPLACE INTO urls (url, visitedAt, expiresAt, namespace, metadata)
|
|
129
|
-
VALUES (?, ?, ?, ?, ?)
|
|
130
|
-
`, key, now, expiresAt, namespace, metaStr);
|
|
131
|
-
if (this.options.maxUrls > 0) {
|
|
132
|
-
const count = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
133
|
-
if (count && count.cnt > this.options.maxUrls) {
|
|
134
|
-
const excess = count.cnt - this.options.maxUrls;
|
|
135
|
-
this.db.run(`
|
|
136
|
-
DELETE FROM urls WHERE url IN (
|
|
137
|
-
SELECT url FROM urls ORDER BY visitedAt ASC LIMIT ?
|
|
138
|
-
)
|
|
139
|
-
`, excess);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
async has(url, namespace) {
|
|
144
|
-
if (this.closed)
|
|
145
|
-
return false;
|
|
146
|
-
await this.initialize();
|
|
147
|
-
const key = this.getUrlKey(url);
|
|
148
|
-
const now = Date.now();
|
|
149
|
-
let entry;
|
|
150
|
-
if (namespace) {
|
|
151
|
-
entry = this.db.get("SELECT url, expiresAt FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
152
|
-
} else {
|
|
153
|
-
entry = this.db.get("SELECT url, expiresAt FROM urls WHERE url = ?", key);
|
|
154
|
-
}
|
|
155
|
-
if (!entry)
|
|
156
|
-
return false;
|
|
157
|
-
return entry.expiresAt >= now;
|
|
158
|
-
}
|
|
159
|
-
async hasMany(urls, namespace) {
|
|
160
|
-
if (this.closed)
|
|
161
|
-
return new Set;
|
|
162
|
-
await this.initialize();
|
|
163
|
-
const result = new Set;
|
|
164
|
-
const now = Date.now();
|
|
165
|
-
const batchSize = 100;
|
|
166
|
-
for (let i = 0;i < urls.length; i += batchSize) {
|
|
167
|
-
const batch = urls.slice(i, i + batchSize);
|
|
168
|
-
const keys = batch.map((u) => this.getUrlKey(u));
|
|
169
|
-
const placeholders = keys.map(() => "?").join(",");
|
|
170
|
-
let entries;
|
|
171
|
-
if (namespace) {
|
|
172
|
-
entries = this.db.all(`SELECT url, expiresAt FROM urls WHERE url IN (${placeholders}) AND namespace = ?`, ...keys, namespace);
|
|
173
|
-
} else {
|
|
174
|
-
entries = this.db.all(`SELECT url, expiresAt FROM urls WHERE url IN (${placeholders})`, ...keys);
|
|
175
|
-
}
|
|
176
|
-
for (const entry of entries) {
|
|
177
|
-
if (entry.expiresAt >= now) {
|
|
178
|
-
const idx = keys.indexOf(entry.url);
|
|
179
|
-
if (idx !== -1) {
|
|
180
|
-
result.add(batch[idx]);
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
return result;
|
|
186
|
-
}
|
|
187
|
-
async getMetadata(url, namespace) {
|
|
188
|
-
if (this.closed)
|
|
189
|
-
return null;
|
|
190
|
-
await this.initialize();
|
|
191
|
-
const key = this.getUrlKey(url);
|
|
192
|
-
let entry;
|
|
193
|
-
if (namespace) {
|
|
194
|
-
entry = this.db.get("SELECT metadata FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
195
|
-
} else {
|
|
196
|
-
entry = this.db.get("SELECT metadata FROM urls WHERE url = ?", key);
|
|
197
|
-
}
|
|
198
|
-
if (!entry?.metadata)
|
|
199
|
-
return null;
|
|
200
|
-
try {
|
|
201
|
-
return JSON.parse(entry.metadata);
|
|
202
|
-
} catch {
|
|
203
|
-
return null;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
async delete(url, namespace) {
|
|
207
|
-
if (this.closed)
|
|
208
|
-
return false;
|
|
209
|
-
await this.initialize();
|
|
210
|
-
const key = this.getUrlKey(url);
|
|
211
|
-
if (namespace) {
|
|
212
|
-
this.db.run("DELETE FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
213
|
-
} else {
|
|
214
|
-
this.db.run("DELETE FROM urls WHERE url = ?", key);
|
|
215
|
-
}
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
async clear(namespace) {
|
|
219
|
-
if (this.closed)
|
|
220
|
-
return;
|
|
221
|
-
await this.initialize();
|
|
222
|
-
if (namespace) {
|
|
223
|
-
this.db.run("DELETE FROM urls WHERE namespace = ?", namespace);
|
|
224
|
-
} else {
|
|
225
|
-
this.db.run("DELETE FROM urls");
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
async cleanup() {
|
|
229
|
-
if (this.closed)
|
|
230
|
-
return 0;
|
|
231
|
-
await this.initialize();
|
|
232
|
-
const now = Date.now();
|
|
233
|
-
const countBefore = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
234
|
-
this.db.run("DELETE FROM urls WHERE expiresAt < ?", now);
|
|
235
|
-
const countAfter = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
236
|
-
return (countBefore?.cnt || 0) - (countAfter?.cnt || 0);
|
|
237
|
-
}
|
|
238
|
-
async getAll(namespace = "default", includeExpired = false) {
|
|
239
|
-
if (this.closed)
|
|
240
|
-
return [];
|
|
241
|
-
await this.initialize();
|
|
242
|
-
const now = Date.now();
|
|
243
|
-
let entries;
|
|
244
|
-
if (includeExpired) {
|
|
245
|
-
entries = this.db.all("SELECT url FROM urls WHERE namespace = ?", namespace);
|
|
246
|
-
} else {
|
|
247
|
-
entries = this.db.all("SELECT url FROM urls WHERE namespace = ? AND expiresAt >= ?", namespace, now);
|
|
248
|
-
}
|
|
249
|
-
return entries.map((e) => e.url);
|
|
250
|
-
}
|
|
251
|
-
async stats(namespace) {
|
|
252
|
-
if (this.closed)
|
|
253
|
-
return { total: 0, expired: 0, namespaces: 0 };
|
|
254
|
-
await this.initialize();
|
|
255
|
-
const now = Date.now();
|
|
256
|
-
let total;
|
|
257
|
-
let expired;
|
|
258
|
-
if (namespace) {
|
|
259
|
-
total = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ?", namespace);
|
|
260
|
-
expired = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt < ?", namespace, now);
|
|
261
|
-
} else {
|
|
262
|
-
total = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
263
|
-
expired = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt < ?", now);
|
|
264
|
-
}
|
|
265
|
-
const namespaceCount = this.db.get("SELECT COUNT(DISTINCT namespace) as cnt FROM urls");
|
|
266
|
-
return {
|
|
267
|
-
total: total?.cnt || 0,
|
|
268
|
-
expired: expired?.cnt || 0,
|
|
269
|
-
namespaces: namespaceCount?.cnt || 0
|
|
270
|
-
};
|
|
271
|
-
}
|
|
272
|
-
async close() {
|
|
273
|
-
if (this.closed)
|
|
274
|
-
return;
|
|
275
|
-
this.closed = true;
|
|
276
|
-
await this.initPromise;
|
|
277
|
-
if (this.db) {
|
|
278
|
-
try {
|
|
279
|
-
this.db.close();
|
|
280
|
-
} catch {}
|
|
281
|
-
this.db = null;
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
get isClosed() {
|
|
285
|
-
return this.closed;
|
|
286
|
-
}
|
|
287
|
-
get path() {
|
|
288
|
-
return this.dbPath;
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
exports.UrlStore = UrlStore;
|
|
293
|
-
exports.default = UrlStore;
|
|
294
|
-
module.exports = Object.assign(UrlStore, exports);
|