rezo 1.0.43 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/adapters/index.cjs +6 -6
  2. package/dist/cache/index.cjs +9 -15
  3. package/dist/cache/index.js +0 -3
  4. package/dist/crawler/addon/decodo/index.cjs +1 -0
  5. package/dist/crawler/addon/decodo/index.js +1 -0
  6. package/dist/crawler/crawler-options.cjs +1 -0
  7. package/dist/crawler/crawler-options.js +1 -0
  8. package/dist/{plugin → crawler}/crawler.cjs +392 -32
  9. package/dist/{plugin → crawler}/crawler.js +392 -32
  10. package/dist/crawler/index.cjs +40 -0
  11. package/dist/{plugin → crawler}/index.js +4 -2
  12. package/dist/crawler/plugin/file-cacher.cjs +19 -0
  13. package/dist/crawler/plugin/file-cacher.js +19 -0
  14. package/dist/crawler/plugin/index.cjs +1 -0
  15. package/dist/crawler/plugin/index.js +1 -0
  16. package/dist/crawler/plugin/navigation-history.cjs +43 -0
  17. package/dist/crawler/plugin/navigation-history.js +43 -0
  18. package/dist/crawler/plugin/robots-txt.cjs +2 -0
  19. package/dist/crawler/plugin/robots-txt.js +2 -0
  20. package/dist/crawler/plugin/url-store.cjs +18 -0
  21. package/dist/crawler/plugin/url-store.js +18 -0
  22. package/dist/crawler.d.ts +315 -172
  23. package/dist/entries/crawler.cjs +5 -5
  24. package/dist/entries/crawler.js +2 -2
  25. package/dist/index.cjs +27 -27
  26. package/dist/internal/agents/index.cjs +10 -10
  27. package/dist/proxy/index.cjs +4 -4
  28. package/dist/queue/index.cjs +8 -8
  29. package/dist/responses/universal/index.cjs +11 -11
  30. package/package.json +2 -6
  31. package/dist/cache/file-cacher.cjs +0 -270
  32. package/dist/cache/file-cacher.js +0 -267
  33. package/dist/cache/navigation-history.cjs +0 -298
  34. package/dist/cache/navigation-history.js +0 -296
  35. package/dist/cache/url-store.cjs +0 -294
  36. package/dist/cache/url-store.js +0 -291
  37. package/dist/plugin/addon/decodo/index.cjs +0 -1
  38. package/dist/plugin/addon/decodo/index.js +0 -1
  39. package/dist/plugin/crawler-options.cjs +0 -1
  40. package/dist/plugin/crawler-options.js +0 -1
  41. package/dist/plugin/index.cjs +0 -36
  42. /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
  43. /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
  44. /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
  45. /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
  46. /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
  47. /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
  48. /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
  49. /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
  50. /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
  51. /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
  52. /package/dist/{plugin → crawler}/scraper.cjs +0 -0
  53. /package/dist/{plugin → crawler}/scraper.js +0 -0
@@ -0,0 +1,19 @@
1
+ import{createRequire as O}from"node:module";var x=Object.create;var{getPrototypeOf:T,defineProperty:d,getOwnPropertyNames:A}=Object;var D=Object.prototype.hasOwnProperty;var p=(e,r,c)=>{c=e!=null?x(T(e)):{};let t=r||!e||!e.__esModule?d(c,"default",{value:e,enumerable:!0}):c;for(let s of A(e))if(!D.call(t,s))d(t,s,{get:()=>e[s],enumerable:!0});return t};var y=O(import.meta.url);import m from"node:fs";import R from"node:path";import{createHash as w}from"node:crypto";import*as u from"node:zlib";var g=typeof globalThis.Bun<"u",f=typeof u.zstdCompressSync==="function";async function S(e){if(g){let{Database:t}=await import("bun:sqlite"),s=new t(e);return{run:(a,...n)=>s.run(a,...n),get:(a,...n)=>s.query(a).get(...n),all:(a,...n)=>s.query(a).all(...n),exec:(a)=>s.exec(a),close:()=>s.close()}}let{DatabaseSync:r}=await import("node:sqlite"),c=new r(e);return{run:(t,...s)=>{if(s.length===0)c.exec(t);else c.prepare(t).run(...s)},get:(t,...s)=>{return c.prepare(t).get(...s)},all:(t,...s)=>{return c.prepare(t).all(...s)},exec:(t)=>c.exec(t),close:()=>c.close()}}function b(e){if(f)return u.zstdCompressSync(e);return e}function N(e){if(f)return u.zstdDecompressSync(e);return e}class E{databases=new Map;options;cacheDir;closed=!1;constructor(e={}){if(this.options={cacheDir:e.cacheDir||"/tmp/rezo-crawler/cache",ttl:e.ttl||604800000,compression:e.compression??!1,encryptNamespace:e.encryptNamespace??!1,maxEntries:e.maxEntries??0},this.cacheDir=R.resolve(this.options.cacheDir),!m.existsSync(this.cacheDir))m.mkdirSync(this.cacheDir,{recursive:!0})}static async create(e={}){return new E(e)}async getDatabase(e){let r=this.options.encryptNamespace?w("md5").update(e).digest("hex"):e.replace(/[^a-zA-Z0-9_-]/g,"_");if(this.databases.has(r))return this.databases.get(r);let c=R.join(this.cacheDir,`${r}.db`),t=await S(c);return t.exec("PRAGMA journal_mode = WAL"),t.exec("PRAGMA synchronous = NORMAL"),t.exec("PRAGMA cache_size = -64000"),t.exec("PRAGMA temp_store = MEMORY"),t.exec("PRAGMA mmap_size = 268435456"),t.exec(`
2
+ CREATE TABLE IF NOT EXISTS cache (
3
+ key TEXT PRIMARY KEY,
4
+ value BLOB NOT NULL,
5
+ expiresAt INTEGER NOT NULL,
6
+ createdAt INTEGER NOT NULL,
7
+ compressed INTEGER DEFAULT 0
8
+ ) WITHOUT ROWID
9
+ `),t.exec("CREATE INDEX IF NOT EXISTS idx_expires ON cache(expiresAt)"),this.databases.set(r,t),t}async set(e,r,c,t="default"){if(this.closed)throw Error("FileCacher is closed");let s=await this.getDatabase(t),a=Date.now(),n=a+(c??this.options.ttl),o=Buffer.from(JSON.stringify(r),"utf-8"),i=0;if(this.options.compression&&f)try{o=b(o),i=1}catch{}if(s.run(`
10
+ INSERT OR REPLACE INTO cache (key, value, expiresAt, createdAt, compressed)
11
+ VALUES (?, ?, ?, ?, ?)
12
+ `,e,Buffer.from(o).toString("base64"),n,a,i),this.options.maxEntries>0){let l=s.get("SELECT COUNT(*) as cnt FROM cache");if(l&&l.cnt>this.options.maxEntries){let h=l.cnt-this.options.maxEntries;s.run(`
13
+ DELETE FROM cache WHERE key IN (
14
+ SELECT key FROM cache ORDER BY createdAt ASC LIMIT ?
15
+ )
16
+ `,h)}}}async setMany(e,r="default"){if(this.closed)throw Error("FileCacher is closed");if(e.length===0)return;let c=await this.getDatabase(r),t=Date.now(),s=t+this.options.ttl;c.exec("BEGIN TRANSACTION");try{for(let a of e){let n=a.ttl?t+a.ttl:s,o=Buffer.from(JSON.stringify(a.value),"utf-8"),i=0;if(this.options.compression&&f)try{o=b(o),i=1}catch{}c.run(`
17
+ INSERT OR REPLACE INTO cache (key, value, expiresAt, createdAt, compressed)
18
+ VALUES (?, ?, ?, ?, ?)
19
+ `,a.key,Buffer.from(o).toString("base64"),n,t,i)}c.exec("COMMIT")}catch(a){throw c.exec("ROLLBACK"),a}}async get(e,r="default"){if(this.closed)throw Error("FileCacher is closed");let c=await this.getDatabase(r),t=c.get("SELECT value, expiresAt, compressed FROM cache WHERE key = ?",e);if(!t)return null;if(t.expiresAt<Date.now())return c.run("DELETE FROM cache WHERE key = ?",e),null;let s=Buffer.from(t.value,"base64");if(t.compressed)try{s=N(s)}catch{return null}try{return JSON.parse(Buffer.from(s).toString("utf-8"))}catch{return null}}async has(e,r="default"){if(this.closed)return!1;let t=(await this.getDatabase(r)).get("SELECT expiresAt FROM cache WHERE key = ?",e);if(!t)return!1;return t.expiresAt>=Date.now()}async hasMany(e,r="default"){if(this.closed)return new Set;if(e.length===0)return new Set;let c=await this.getDatabase(r),t=Date.now(),s=new Set,a=500;for(let n=0;n<e.length;n+=a){let o=e.slice(n,n+a),i=o.map(()=>"?").join(","),l=c.all(`SELECT key, expiresAt FROM cache WHERE key IN (${i})`,...o);for(let h of l)if(h.expiresAt>=t)s.add(h.key)}return s}async delete(e,r="default"){if(this.closed)return!1;return(await this.getDatabase(r)).run("DELETE FROM cache WHERE key = ?",e),!0}async clear(e="default"){if(this.closed)return;(await this.getDatabase(e)).exec("DELETE FROM cache")}async cleanup(e="default"){if(this.closed)return 0;let r=await this.getDatabase(e),c=Date.now(),t=r.get("SELECT COUNT(*) as cnt FROM cache");r.run("DELETE FROM cache WHERE expiresAt < ?",c);let s=r.get("SELECT COUNT(*) as cnt FROM cache");return(t?.cnt||0)-(s?.cnt||0)}async stats(e="default"){if(this.closed)return{count:0,expired:0};let r=await this.getDatabase(e),c=Date.now(),t=r.get("SELECT COUNT(*) as cnt FROM cache"),s=r.get("SELECT COUNT(*) as cnt FROM cache WHERE expiresAt < ?",c);return{count:t?.cnt||0,expired:s?.cnt||0}}async close(){if(this.closed)return;this.closed=!0;for(let e of this.databases.values())try{e.close()}catch{}this.databases.clear()}get isClosed(){return this.closed}get directory(){return this.cacheDir}}var z=E;export{z as default,E as FileCacher};
@@ -0,0 +1 @@
1
+ var e=require("./file-cacher.cjs");exports.FileCacher=e.FileCacher;var r=require("./url-store.cjs");exports.UrlStore=r.UrlStore;
@@ -0,0 +1 @@
1
+ import{FileCacher as o}from"./file-cacher.js";import{UrlStore as f}from"./url-store.js";export{f as UrlStore,o as FileCacher};
@@ -0,0 +1,43 @@
1
+ var h=Object.create;var{getPrototypeOf:T,defineProperty:a,getOwnPropertyNames:c}=Object;var N=Object.prototype.hasOwnProperty;var u=(s,e,t)=>{t=s!=null?h(T(s)):{};let i=e||!s||!s.__esModule?a(t,"default",{value:s,enumerable:!0}):t;for(let r of c(s))if(!N.call(i,r))a(i,r,{get:()=>s[r],enumerable:!0});return i};var E=require("node:fs"),l=require("node:path"),{createHash:y}=require("node:crypto"),R=typeof globalThis.Bun<"u";async function b(s){if(R){let{Database:i}=await import("bun:sqlite"),r=new i(s);return{run:(n,...o)=>r.run(n,...o),get:(n,...o)=>r.query(n).get(...o),all:(n,...o)=>r.query(n).all(...o),close:()=>r.close()}}let{DatabaseSync:e}=await import("node:sqlite"),t=new e(s);return{run:(i,...r)=>{t.prepare(i).run(...r)},get:(i,...r)=>{return t.prepare(i).get(...r)},all:(i,...r)=>{return t.prepare(i).all(...r)},close:()=>t.close()}}class d{db=null;options;storeDir;dbPath;closed=!1;initPromise=null;constructor(s={}){if(this.options={storeDir:s.storeDir||"/tmp/rezo-crawler/navigation",dbFileName:s.dbFileName||"navigation.db",hashUrls:s.hashUrls??!1},this.storeDir=l.resolve(this.options.storeDir),this.dbPath=l.join(this.storeDir,this.options.dbFileName),!E.existsSync(this.storeDir))E.mkdirSync(this.storeDir,{recursive:!0})}static async create(s={}){let e=new d(s);return await e.initialize(),e}async initialize(){if(this.initPromise)return this.initPromise;return this.initPromise=(async()=>{this.db=await b(this.dbPath),this.db.run("PRAGMA journal_mode = WAL"),this.db.run("PRAGMA synchronous = NORMAL"),this.db.run("PRAGMA cache_size = -32000"),this.db.run("PRAGMA temp_store = MEMORY"),this.db.run("PRAGMA mmap_size = 134217728"),this.db.run(`
2
+ CREATE TABLE IF NOT EXISTS sessions (
3
+ sessionId TEXT PRIMARY KEY,
4
+ baseUrl TEXT NOT NULL,
5
+ startedAt INTEGER NOT NULL,
6
+ lastActivityAt INTEGER NOT NULL,
7
+ status TEXT DEFAULT 'running',
8
+ urlsVisited INTEGER DEFAULT 0,
9
+ urlsQueued INTEGER DEFAULT 0,
10
+ urlsFailed INTEGER DEFAULT 0,
11
+ metadata TEXT
12
+ )
13
+ `),this.db.run(`
14
+ CREATE TABLE IF NOT EXISTS queue (
15
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ sessionId TEXT NOT NULL,
17
+ urlKey TEXT NOT NULL,
18
+ originalUrl TEXT NOT NULL,
19
+ method TEXT DEFAULT 'GET',
20
+ priority INTEGER DEFAULT 0,
21
+ body TEXT,
22
+ headers TEXT,
23
+ metadata TEXT,
24
+ addedAt INTEGER NOT NULL,
25
+ UNIQUE(sessionId, urlKey)
26
+ )
27
+ `),this.db.run(`
28
+ CREATE TABLE IF NOT EXISTS visited (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ sessionId TEXT NOT NULL,
31
+ urlKey TEXT NOT NULL,
32
+ originalUrl TEXT NOT NULL,
33
+ status INTEGER,
34
+ visitedAt INTEGER NOT NULL,
35
+ finalUrl TEXT,
36
+ contentType TEXT,
37
+ errorMessage TEXT,
38
+ UNIQUE(sessionId, urlKey)
39
+ )
40
+ `),this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_session ON queue(sessionId)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_priority ON queue(sessionId, priority DESC)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_visited_session ON visited(sessionId)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status)")})(),this.initPromise}getUrlKey(s){if(this.options.hashUrls)return y("sha256").update(s).digest("hex");return s}async createSession(s,e,t){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let i=Date.now(),r={sessionId:s,baseUrl:e,startedAt:i,lastActivityAt:i,status:"running",urlsVisited:0,urlsQueued:0,urlsFailed:0,metadata:t?JSON.stringify(t):void 0};return this.db.run(`INSERT OR REPLACE INTO sessions (sessionId, baseUrl, startedAt, lastActivityAt, status, urlsVisited, urlsQueued, urlsFailed, metadata)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,s,e,i,i,"running",0,0,0,r.metadata??null),r}async getSession(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT * FROM sessions WHERE sessionId = ?",s)}async updateSessionStatus(s,e){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("UPDATE sessions SET status = ?, lastActivityAt = ? WHERE sessionId = ?",e,Date.now(),s)}async updateSessionStats(s,e){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=["lastActivityAt = ?"],i=[Date.now()];if(e.urlsVisited!==void 0)t.push("urlsVisited = ?"),i.push(e.urlsVisited);if(e.urlsQueued!==void 0)t.push("urlsQueued = ?"),i.push(e.urlsQueued);if(e.urlsFailed!==void 0)t.push("urlsFailed = ?"),i.push(e.urlsFailed);i.push(s),this.db.run(`UPDATE sessions SET ${t.join(", ")} WHERE sessionId = ?`,...i)}async addToQueue(s,e,t={}){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let i=this.getUrlKey(e);if(this.db.get("SELECT id FROM queue WHERE sessionId = ? AND urlKey = ?",s,i))return!1;if(this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?",s,i))return!1;return this.db.run(`INSERT INTO queue (sessionId, urlKey, originalUrl, method, priority, body, headers, metadata, addedAt)
42
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,s,i,e,t.method||"GET",t.priority||0,t.body?JSON.stringify(t.body):null,t.headers?JSON.stringify(t.headers):null,t.metadata?JSON.stringify(t.metadata):null,Date.now()),!0}async getNextFromQueue(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC LIMIT 1",s)}async removeFromQueue(s,e){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=this.getUrlKey(e);return this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?",s,t),!0}async getQueueSize(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT COUNT(*) as count FROM queue WHERE sessionId = ?",s)?.count||0}async markVisited(s,e,t={}){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let i=this.getUrlKey(e);this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?",s,i),this.db.run(`INSERT OR REPLACE INTO visited (sessionId, urlKey, originalUrl, status, visitedAt, finalUrl, contentType, errorMessage)
43
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,s,i,e,t.status||0,Date.now(),t.finalUrl??null,t.contentType??null,t.errorMessage??null)}async isVisited(s,e){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=this.getUrlKey(e);return!!this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?",s,t)}async getVisitedCount(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT COUNT(*) as count FROM visited WHERE sessionId = ?",s)?.count||0}async getFailedUrls(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT url, status, visitedAt, finalUrl, contentType, errorMessage FROM visited WHERE sessionId = ? AND (status >= 400 OR errorMessage IS NOT NULL)",s)}async getAllQueuedUrls(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC",s)}async clearQueue(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM queue WHERE sessionId = ?",s)}async clearVisited(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM visited WHERE sessionId = ?",s)}async deleteSession(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM queue WHERE sessionId = ?",s),this.db.run("DELETE FROM visited WHERE sessionId = ?",s),this.db.run("DELETE FROM sessions WHERE sessionId = ?",s)}async getResumableSessions(){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT * FROM sessions WHERE status IN ('running', 'paused') ORDER BY lastActivityAt DESC")}async close(){if(this.closed)return;if(this.closed=!0,this.db)this.db.close(),this.db=null}get isClosed(){return this.closed}get databasePath(){return this.dbPath}}exports.NavigationHistory=d;
@@ -0,0 +1,43 @@
1
+ import{createRequire as R}from"node:module";var T=Object.create;var{getPrototypeOf:c,defineProperty:d,getOwnPropertyNames:N}=Object;var y=Object.prototype.hasOwnProperty;var a=(s,i,t)=>{t=s!=null?T(c(s)):{};let e=i||!s||!s.__esModule?d(t,"default",{value:s,enumerable:!0}):t;for(let r of N(s))if(!y.call(e,r))d(e,r,{get:()=>s[r],enumerable:!0});return e};var u=R(import.meta.url);import E from"node:fs";import l from"node:path";import{createHash as b}from"node:crypto";var A=typeof globalThis.Bun<"u";async function I(s){if(A){let{Database:e}=await import("bun:sqlite"),r=new e(s);return{run:(n,...o)=>r.run(n,...o),get:(n,...o)=>r.query(n).get(...o),all:(n,...o)=>r.query(n).all(...o),close:()=>r.close()}}let{DatabaseSync:i}=await import("node:sqlite"),t=new i(s);return{run:(e,...r)=>{t.prepare(e).run(...r)},get:(e,...r)=>{return t.prepare(e).get(...r)},all:(e,...r)=>{return t.prepare(e).all(...r)},close:()=>t.close()}}class h{db=null;options;storeDir;dbPath;closed=!1;initPromise=null;constructor(s={}){if(this.options={storeDir:s.storeDir||"/tmp/rezo-crawler/navigation",dbFileName:s.dbFileName||"navigation.db",hashUrls:s.hashUrls??!1},this.storeDir=l.resolve(this.options.storeDir),this.dbPath=l.join(this.storeDir,this.options.dbFileName),!E.existsSync(this.storeDir))E.mkdirSync(this.storeDir,{recursive:!0})}static async create(s={}){let i=new h(s);return await i.initialize(),i}async initialize(){if(this.initPromise)return this.initPromise;return this.initPromise=(async()=>{this.db=await I(this.dbPath),this.db.run("PRAGMA journal_mode = WAL"),this.db.run("PRAGMA synchronous = NORMAL"),this.db.run("PRAGMA cache_size = -32000"),this.db.run("PRAGMA temp_store = MEMORY"),this.db.run("PRAGMA mmap_size = 134217728"),this.db.run(`
2
+ CREATE TABLE IF NOT EXISTS sessions (
3
+ sessionId TEXT PRIMARY KEY,
4
+ baseUrl TEXT NOT NULL,
5
+ startedAt INTEGER NOT NULL,
6
+ lastActivityAt INTEGER NOT NULL,
7
+ status TEXT DEFAULT 'running',
8
+ urlsVisited INTEGER DEFAULT 0,
9
+ urlsQueued INTEGER DEFAULT 0,
10
+ urlsFailed INTEGER DEFAULT 0,
11
+ metadata TEXT
12
+ )
13
+ `),this.db.run(`
14
+ CREATE TABLE IF NOT EXISTS queue (
15
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ sessionId TEXT NOT NULL,
17
+ urlKey TEXT NOT NULL,
18
+ originalUrl TEXT NOT NULL,
19
+ method TEXT DEFAULT 'GET',
20
+ priority INTEGER DEFAULT 0,
21
+ body TEXT,
22
+ headers TEXT,
23
+ metadata TEXT,
24
+ addedAt INTEGER NOT NULL,
25
+ UNIQUE(sessionId, urlKey)
26
+ )
27
+ `),this.db.run(`
28
+ CREATE TABLE IF NOT EXISTS visited (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ sessionId TEXT NOT NULL,
31
+ urlKey TEXT NOT NULL,
32
+ originalUrl TEXT NOT NULL,
33
+ status INTEGER,
34
+ visitedAt INTEGER NOT NULL,
35
+ finalUrl TEXT,
36
+ contentType TEXT,
37
+ errorMessage TEXT,
38
+ UNIQUE(sessionId, urlKey)
39
+ )
40
+ `),this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_session ON queue(sessionId)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_priority ON queue(sessionId, priority DESC)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_visited_session ON visited(sessionId)"),this.db.run("CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status)")})(),this.initPromise}getUrlKey(s){if(this.options.hashUrls)return b("sha256").update(s).digest("hex");return s}async createSession(s,i,t){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let e=Date.now(),r={sessionId:s,baseUrl:i,startedAt:e,lastActivityAt:e,status:"running",urlsVisited:0,urlsQueued:0,urlsFailed:0,metadata:t?JSON.stringify(t):void 0};return this.db.run(`INSERT OR REPLACE INTO sessions (sessionId, baseUrl, startedAt, lastActivityAt, status, urlsVisited, urlsQueued, urlsFailed, metadata)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,s,i,e,e,"running",0,0,0,r.metadata??null),r}async getSession(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT * FROM sessions WHERE sessionId = ?",s)}async updateSessionStatus(s,i){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("UPDATE sessions SET status = ?, lastActivityAt = ? WHERE sessionId = ?",i,Date.now(),s)}async updateSessionStats(s,i){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=["lastActivityAt = ?"],e=[Date.now()];if(i.urlsVisited!==void 0)t.push("urlsVisited = ?"),e.push(i.urlsVisited);if(i.urlsQueued!==void 0)t.push("urlsQueued = ?"),e.push(i.urlsQueued);if(i.urlsFailed!==void 0)t.push("urlsFailed = ?"),e.push(i.urlsFailed);e.push(s),this.db.run(`UPDATE sessions SET ${t.join(", ")} WHERE sessionId = ?`,...e)}async addToQueue(s,i,t={}){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let e=this.getUrlKey(i);if(this.db.get("SELECT id FROM queue WHERE sessionId = ? AND urlKey = ?",s,e))return!1;if(this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?",s,e))return!1;return this.db.run(`INSERT INTO queue (sessionId, urlKey, originalUrl, method, priority, body, headers, metadata, addedAt)
42
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,s,e,i,t.method||"GET",t.priority||0,t.body?JSON.stringify(t.body):null,t.headers?JSON.stringify(t.headers):null,t.metadata?JSON.stringify(t.metadata):null,Date.now()),!0}async getNextFromQueue(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC LIMIT 1",s)}async removeFromQueue(s,i){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=this.getUrlKey(i);return this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?",s,t),!0}async getQueueSize(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT COUNT(*) as count FROM queue WHERE sessionId = ?",s)?.count||0}async markVisited(s,i,t={}){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let e=this.getUrlKey(i);this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?",s,e),this.db.run(`INSERT OR REPLACE INTO visited (sessionId, urlKey, originalUrl, status, visitedAt, finalUrl, contentType, errorMessage)
43
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,s,e,i,t.status||0,Date.now(),t.finalUrl??null,t.contentType??null,t.errorMessage??null)}async isVisited(s,i){if(this.closed||!this.db)throw Error("NavigationHistory is closed");let t=this.getUrlKey(i);return!!this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?",s,t)}async getVisitedCount(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.get("SELECT COUNT(*) as count FROM visited WHERE sessionId = ?",s)?.count||0}async getFailedUrls(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT url, status, visitedAt, finalUrl, contentType, errorMessage FROM visited WHERE sessionId = ? AND (status >= 400 OR errorMessage IS NOT NULL)",s)}async getAllQueuedUrls(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC",s)}async clearQueue(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM queue WHERE sessionId = ?",s)}async clearVisited(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM visited WHERE sessionId = ?",s)}async deleteSession(s){if(this.closed||!this.db)throw Error("NavigationHistory is closed");this.db.run("DELETE FROM queue WHERE sessionId = ?",s),this.db.run("DELETE FROM visited WHERE sessionId = ?",s),this.db.run("DELETE FROM sessions WHERE sessionId = ?",s)}async getResumableSessions(){if(this.closed||!this.db)throw Error("NavigationHistory is closed");return this.db.all("SELECT * FROM sessions WHERE status IN ('running', 'paused') ORDER BY lastActivityAt DESC")}async close(){if(this.closed)return;if(this.closed=!0,this.db)this.db.close(),this.db=null}get isClosed(){return this.closed}get databasePath(){return this.dbPath}}export{h as NavigationHistory};
@@ -0,0 +1,2 @@
1
+ class p{cache={};userAgent;cacheTTL;constructor(e={}){this.userAgent=e.userAgent||"RezoBot",this.cacheTTL=e.cacheTTL||86400000}parse(e,r){let t=(r||this.userAgent).toLowerCase(),l=e.split(`
2
+ `).map((i)=>i.trim()),s={rules:[],sitemaps:[]},c=null,a=!1,n=!1,f=[],g;for(let i of l){if(!i||i.startsWith("#"))continue;let u=i.indexOf(":");if(u===-1)continue;let w=i.substring(0,u).trim().toLowerCase(),h=i.substring(u+1).trim();if(w==="user-agent"){if(c=h.toLowerCase(),a=c==="*"||c===t||t.includes(c)||c.includes(t),a&&c!=="*")n=!0}else if(c!==null){let d=c==="*";if(w==="disallow"&&h){let o={path:h,allow:!1};if(d&&!n)f.push(o);else if(a&&!d)s.rules.push(o)}else if(w==="allow"&&h){let o={path:h,allow:!0};if(d&&!n)f.push(o);else if(a&&!d)s.rules.push(o)}else if(w==="crawl-delay"){let o=parseFloat(h);if(!isNaN(o)){if(d&&!n)g=o*1000;else if(a&&!d)s.crawlDelay=o*1000}}else if(w==="sitemap"){if(!s.sitemaps.includes(h))s.sitemaps.push(h)}}}if(!n||s.rules.length===0){if(s.rules.push(...f),s.crawlDelay===void 0)s.crawlDelay=g}return s.rules.sort((i,u)=>{if(i.path.length!==u.path.length)return u.path.length-i.path.length;return i.allow===u.allow?0:i.allow?-1:1}),s}async fetch(e,r){let l=new URL(e).origin,s=this.cache[l];if(s&&Date.now()-s.fetchedAt<s.ttl)return s.directives;try{let a=`${l}/robots.txt`,n=await r(a);if(n.status===200&&n.data){let f=this.parse(n.data);return this.cache[l]={directives:f,fetchedAt:Date.now(),ttl:this.cacheTTL},f}}catch{}let c={rules:[],sitemaps:[]};return this.cache[l]={directives:c,fetchedAt:Date.now(),ttl:this.cacheTTL},c}isAllowed(e,r){let t=new URL(e),l=t.origin,s=t.pathname+t.search,c=r?.rules||this.cache[l]?.directives.rules||[];if(c.length===0)return!0;for(let a of c)if(this.matchPath(s,a.path))return a.allow;return!0}matchPath(e,r){if(r===e)return!0;let t=r.replace(/[.+?^${}()|[\]\\]/g,"\\$&").replace(/\*/g,".*");if(t.endsWith("$"))t=t.slice(0,-1)+"$";else t="^"+t;try{return new RegExp(t).test(e)}catch{return e.startsWith(r.replace(/\*|\$/g,""))}}getCrawlDelay(e){let r=new URL(e).origin;return this.cache[r]?.directives.crawlDelay}getSitemaps(e){let r=new URL(e).origin;return this.cache[r]?.directives.sitemaps||[]}clearCache(e){if(e)delete this.cache[e];else this.cache={}}isCached(e){let r=new URL(e).origin,t=this.cache[r];return!!t&&Date.now()-t.fetchedAt<t.ttl}}exports.RobotsTxt=p;exports.default=p;module.exports=Object.assign(p,exports);
@@ -0,0 +1,2 @@
1
+ class g{cache={};userAgent;cacheTTL;constructor(e={}){this.userAgent=e.userAgent||"RezoBot",this.cacheTTL=e.cacheTTL||86400000}parse(e,r){let t=(r||this.userAgent).toLowerCase(),l=e.split(`
2
+ `).map((i)=>i.trim()),s={rules:[],sitemaps:[]},c=null,a=!1,n=!1,f=[],p;for(let i of l){if(!i||i.startsWith("#"))continue;let u=i.indexOf(":");if(u===-1)continue;let w=i.substring(0,u).trim().toLowerCase(),h=i.substring(u+1).trim();if(w==="user-agent"){if(c=h.toLowerCase(),a=c==="*"||c===t||t.includes(c)||c.includes(t),a&&c!=="*")n=!0}else if(c!==null){let d=c==="*";if(w==="disallow"&&h){let o={path:h,allow:!1};if(d&&!n)f.push(o);else if(a&&!d)s.rules.push(o)}else if(w==="allow"&&h){let o={path:h,allow:!0};if(d&&!n)f.push(o);else if(a&&!d)s.rules.push(o)}else if(w==="crawl-delay"){let o=parseFloat(h);if(!isNaN(o)){if(d&&!n)p=o*1000;else if(a&&!d)s.crawlDelay=o*1000}}else if(w==="sitemap"){if(!s.sitemaps.includes(h))s.sitemaps.push(h)}}}if(!n||s.rules.length===0){if(s.rules.push(...f),s.crawlDelay===void 0)s.crawlDelay=p}return s.rules.sort((i,u)=>{if(i.path.length!==u.path.length)return u.path.length-i.path.length;return i.allow===u.allow?0:i.allow?-1:1}),s}async fetch(e,r){let l=new URL(e).origin,s=this.cache[l];if(s&&Date.now()-s.fetchedAt<s.ttl)return s.directives;try{let a=`${l}/robots.txt`,n=await r(a);if(n.status===200&&n.data){let f=this.parse(n.data);return this.cache[l]={directives:f,fetchedAt:Date.now(),ttl:this.cacheTTL},f}}catch{}let c={rules:[],sitemaps:[]};return this.cache[l]={directives:c,fetchedAt:Date.now(),ttl:this.cacheTTL},c}isAllowed(e,r){let t=new URL(e),l=t.origin,s=t.pathname+t.search,c=r?.rules||this.cache[l]?.directives.rules||[];if(c.length===0)return!0;for(let a of c)if(this.matchPath(s,a.path))return a.allow;return!0}matchPath(e,r){if(r===e)return!0;let t=r.replace(/[.+?^${}()|[\]\\]/g,"\\$&").replace(/\*/g,".*");if(t.endsWith("$"))t=t.slice(0,-1)+"$";else t="^"+t;try{return new RegExp(t).test(e)}catch{return e.startsWith(r.replace(/\*|\$/g,""))}}getCrawlDelay(e){let r=new URL(e).origin;return this.cache[r]?.directives.crawlDelay}getSitemaps(e){let r=new URL(e).origin;return this.cache[r]?.directives.sitemaps||[]}clearCache(e){if(e)delete this.cache[e];else this.cache={}}isCached(e){let r=new URL(e).origin,t=this.cache[r];return!!t&&Date.now()-t.fetchedAt<t.ttl}}var L=g;export{L as default,g as RobotsTxt};
@@ -0,0 +1,18 @@
1
+ var f=Object.create;var{getPrototypeOf:A,defineProperty:T,getOwnPropertyNames:O}=Object;var x=Object.prototype.hasOwnProperty;var R=(t,r,e)=>{e=t!=null?f(A(t)):{};let i=r||!t||!t.__esModule?T(e,"default",{value:t,enumerable:!0}):e;for(let s of O(t))if(!x.call(i,s))T(i,s,{get:()=>t[s],enumerable:!0});return i};var p=require("node:fs"),b=require("node:path"),{createHash:w}=require("node:crypto"),N=typeof globalThis.Bun<"u";async function y(t){if(N){let{Database:i}=await import("bun:sqlite"),s=new i(t);return{run:(n,...a)=>s.run(n,...a),get:(n,...a)=>s.query(n).get(...a),all:(n,...a)=>s.query(n).all(...a),exec:(n)=>s.exec(n),close:()=>s.close()}}let{DatabaseSync:r}=await import("node:sqlite"),e=new r(t);return{run:(i,...s)=>{if(s.length===0)e.exec(i);else e.prepare(i).run(...s)},get:(i,...s)=>{return e.prepare(i).get(...s)},all:(i,...s)=>{return e.prepare(i).all(...s)},exec:(i)=>e.exec(i),close:()=>e.close()}}class c{db=null;options;storeDir;dbPath;closed=!1;initPromise=null;constructor(t={}){if(this.options={storeDir:t.storeDir||"/tmp/rezo-crawler/urls",dbFileName:t.dbFileName||"urls.db",ttl:t.ttl||604800000,maxUrls:t.maxUrls??0,hashUrls:t.hashUrls??!0},this.storeDir=b.resolve(this.options.storeDir),this.dbPath=b.join(this.storeDir,this.options.dbFileName),!p.existsSync(this.storeDir))p.mkdirSync(this.storeDir,{recursive:!0})}static async create(t={}){let r=new c(t);return await r.initialize(),r}async initialize(){if(this.initPromise)return this.initPromise;return this.initPromise=(async()=>{this.db=await y(this.dbPath),this.db.exec("PRAGMA journal_mode = WAL"),this.db.exec("PRAGMA synchronous = NORMAL"),this.db.exec("PRAGMA cache_size = -32000"),this.db.exec("PRAGMA temp_store = MEMORY"),this.db.exec("PRAGMA mmap_size = 134217728"),this.db.exec(`
2
+ CREATE TABLE IF NOT EXISTS urls (
3
+ urlHash TEXT PRIMARY KEY,
4
+ visitedAt INTEGER NOT NULL,
5
+ expiresAt INTEGER NOT NULL,
6
+ namespace TEXT NOT NULL DEFAULT 'default'
7
+ ) WITHOUT ROWID
8
+ `),this.db.exec("CREATE INDEX IF NOT EXISTS idx_ns_exp ON urls(namespace, expiresAt)")})(),this.initPromise}getUrlKey(t){if(this.options.hashUrls)return w("sha256").update(t).digest("hex");return t}async set(t,r="default",e){if(this.closed)throw Error("UrlStore is closed");await this.initialize();let i=this.getUrlKey(t),s=Date.now(),n=s+(e??this.options.ttl);if(this.db.run(`
9
+ INSERT OR REPLACE INTO urls (urlHash, visitedAt, expiresAt, namespace)
10
+ VALUES (?, ?, ?, ?)
11
+ `,i,s,n,r),this.options.maxUrls>0){let a=this.db.get("SELECT COUNT(*) as cnt FROM urls");if(a&&a.cnt>this.options.maxUrls){let l=a.cnt-this.options.maxUrls;this.db.run(`
12
+ DELETE FROM urls WHERE urlHash IN (
13
+ SELECT urlHash FROM urls ORDER BY visitedAt ASC LIMIT ?
14
+ )
15
+ `,l)}}}async setMany(t,r="default",e){if(this.closed)throw Error("UrlStore is closed");if(t.length===0)return;await this.initialize();let i=Date.now(),s=i+(e??this.options.ttl);this.db.exec("BEGIN TRANSACTION");try{for(let n of t){let a=this.getUrlKey(n);this.db.run(`
16
+ INSERT OR REPLACE INTO urls (urlHash, visitedAt, expiresAt, namespace)
17
+ VALUES (?, ?, ?, ?)
18
+ `,a,i,s,r)}this.db.exec("COMMIT")}catch(n){throw this.db.exec("ROLLBACK"),n}}async has(t,r){if(this.closed)return!1;await this.initialize();let e=this.getUrlKey(t),i=Date.now(),s;if(r)s=this.db.get("SELECT expiresAt FROM urls WHERE urlHash = ? AND namespace = ?",e,r);else s=this.db.get("SELECT expiresAt FROM urls WHERE urlHash = ?",e);if(!s)return!1;return s.expiresAt>=i}async hasMany(t,r){if(this.closed)return new Set;if(t.length===0)return new Set;await this.initialize();let e=new Set,i=Date.now(),s=new Map;for(let l of t)s.set(this.getUrlKey(l),l);let n=Array.from(s.keys()),a=500;for(let l=0;l<n.length;l+=a){let o=n.slice(l,l+a),u=o.map(()=>"?").join(","),h;if(r)h=this.db.all(`SELECT urlHash, expiresAt FROM urls WHERE urlHash IN (${u}) AND namespace = ?`,...o,r);else h=this.db.all(`SELECT urlHash, expiresAt FROM urls WHERE urlHash IN (${u})`,...o);for(let E of h)if(E.expiresAt>=i){let d=s.get(E.urlHash);if(d)e.add(d)}}return e}async filterUnvisited(t,r){let e=await this.hasMany(t,r);return t.filter((i)=>!e.has(i))}async delete(t,r){if(this.closed)return!1;await this.initialize();let e=this.getUrlKey(t);if(r)this.db.run("DELETE FROM urls WHERE urlHash = ? AND namespace = ?",e,r);else this.db.run("DELETE FROM urls WHERE urlHash = ?",e);return!0}async clear(t){if(this.closed)return;if(await this.initialize(),t)this.db.run("DELETE FROM urls WHERE namespace = ?",t);else this.db.exec("DELETE FROM urls")}async cleanup(){if(this.closed)return 0;await this.initialize();let t=Date.now(),r=this.db.get("SELECT COUNT(*) as cnt FROM urls");this.db.run("DELETE FROM urls WHERE expiresAt < ?",t);let e=this.db.get("SELECT COUNT(*) as cnt FROM urls");return(r?.cnt||0)-(e?.cnt||0)}async count(t){if(this.closed)return 0;await this.initialize();let r=Date.now();if(t)return this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt >= ?",t,r)?.cnt||0;return this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt >= ?",r)?.cnt||0}async stats(t){if(this.closed)return{total:0,expired:0,namespaces:0};await this.initialize();let r=Date.now(),e,i;if(t)e=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ?",t),i=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt < ?",t,r);else e=this.db.get("SELECT COUNT(*) as cnt FROM urls"),i=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt < ?",r);let s=this.db.get("SELECT COUNT(DISTINCT namespace) as cnt FROM urls");return{total:e?.cnt||0,expired:i?.cnt||0,namespaces:s?.cnt||0}}async close(){if(this.closed)return;if(this.closed=!0,await this.initPromise,this.db){try{this.db.close()}catch{}this.db=null}}get isClosed(){return this.closed}get path(){return this.dbPath}}exports.UrlStore=c;exports.default=c;module.exports=Object.assign(c,exports);
@@ -0,0 +1,18 @@
1
+ import{createRequire as N}from"node:module";var b=Object.create;var{getPrototypeOf:O,defineProperty:T,getOwnPropertyNames:x}=Object;var w=Object.prototype.hasOwnProperty;var R=(t,r,e)=>{e=t!=null?b(O(t)):{};let i=r||!t||!t.__esModule?T(e,"default",{value:t,enumerable:!0}):e;for(let s of x(t))if(!w.call(i,s))T(i,s,{get:()=>t[s],enumerable:!0});return i};var p=N(import.meta.url);import f from"node:fs";import A from"node:path";import{createHash as m}from"node:crypto";var y=typeof globalThis.Bun<"u";async function L(t){if(y){let{Database:i}=await import("bun:sqlite"),s=new i(t);return{run:(n,...a)=>s.run(n,...a),get:(n,...a)=>s.query(n).get(...a),all:(n,...a)=>s.query(n).all(...a),exec:(n)=>s.exec(n),close:()=>s.close()}}let{DatabaseSync:r}=await import("node:sqlite"),e=new r(t);return{run:(i,...s)=>{if(s.length===0)e.exec(i);else e.prepare(i).run(...s)},get:(i,...s)=>{return e.prepare(i).get(...s)},all:(i,...s)=>{return e.prepare(i).all(...s)},exec:(i)=>e.exec(i),close:()=>e.close()}}class h{db=null;options;storeDir;dbPath;closed=!1;initPromise=null;constructor(t={}){if(this.options={storeDir:t.storeDir||"/tmp/rezo-crawler/urls",dbFileName:t.dbFileName||"urls.db",ttl:t.ttl||604800000,maxUrls:t.maxUrls??0,hashUrls:t.hashUrls??!0},this.storeDir=A.resolve(this.options.storeDir),this.dbPath=A.join(this.storeDir,this.options.dbFileName),!f.existsSync(this.storeDir))f.mkdirSync(this.storeDir,{recursive:!0})}static async create(t={}){let r=new h(t);return await r.initialize(),r}async initialize(){if(this.initPromise)return this.initPromise;return this.initPromise=(async()=>{this.db=await L(this.dbPath),this.db.exec("PRAGMA journal_mode = WAL"),this.db.exec("PRAGMA synchronous = NORMAL"),this.db.exec("PRAGMA cache_size = -32000"),this.db.exec("PRAGMA temp_store = MEMORY"),this.db.exec("PRAGMA mmap_size = 134217728"),this.db.exec(`
2
+ CREATE TABLE IF NOT EXISTS urls (
3
+ urlHash TEXT PRIMARY KEY,
4
+ visitedAt INTEGER NOT NULL,
5
+ expiresAt INTEGER NOT NULL,
6
+ namespace TEXT NOT NULL DEFAULT 'default'
7
+ ) WITHOUT ROWID
8
+ `),this.db.exec("CREATE INDEX IF NOT EXISTS idx_ns_exp ON urls(namespace, expiresAt)")})(),this.initPromise}getUrlKey(t){if(this.options.hashUrls)return m("sha256").update(t).digest("hex");return t}async set(t,r="default",e){if(this.closed)throw Error("UrlStore is closed");await this.initialize();let i=this.getUrlKey(t),s=Date.now(),n=s+(e??this.options.ttl);if(this.db.run(`
9
+ INSERT OR REPLACE INTO urls (urlHash, visitedAt, expiresAt, namespace)
10
+ VALUES (?, ?, ?, ?)
11
+ `,i,s,n,r),this.options.maxUrls>0){let a=this.db.get("SELECT COUNT(*) as cnt FROM urls");if(a&&a.cnt>this.options.maxUrls){let l=a.cnt-this.options.maxUrls;this.db.run(`
12
+ DELETE FROM urls WHERE urlHash IN (
13
+ SELECT urlHash FROM urls ORDER BY visitedAt ASC LIMIT ?
14
+ )
15
+ `,l)}}}async setMany(t,r="default",e){if(this.closed)throw Error("UrlStore is closed");if(t.length===0)return;await this.initialize();let i=Date.now(),s=i+(e??this.options.ttl);this.db.exec("BEGIN TRANSACTION");try{for(let n of t){let a=this.getUrlKey(n);this.db.run(`
16
+ INSERT OR REPLACE INTO urls (urlHash, visitedAt, expiresAt, namespace)
17
+ VALUES (?, ?, ?, ?)
18
+ `,a,i,s,r)}this.db.exec("COMMIT")}catch(n){throw this.db.exec("ROLLBACK"),n}}async has(t,r){if(this.closed)return!1;await this.initialize();let e=this.getUrlKey(t),i=Date.now(),s;if(r)s=this.db.get("SELECT expiresAt FROM urls WHERE urlHash = ? AND namespace = ?",e,r);else s=this.db.get("SELECT expiresAt FROM urls WHERE urlHash = ?",e);if(!s)return!1;return s.expiresAt>=i}async hasMany(t,r){if(this.closed)return new Set;if(t.length===0)return new Set;await this.initialize();let e=new Set,i=Date.now(),s=new Map;for(let l of t)s.set(this.getUrlKey(l),l);let n=Array.from(s.keys()),a=500;for(let l=0;l<n.length;l+=a){let o=n.slice(l,l+a),u=o.map(()=>"?").join(","),c;if(r)c=this.db.all(`SELECT urlHash, expiresAt FROM urls WHERE urlHash IN (${u}) AND namespace = ?`,...o,r);else c=this.db.all(`SELECT urlHash, expiresAt FROM urls WHERE urlHash IN (${u})`,...o);for(let E of c)if(E.expiresAt>=i){let d=s.get(E.urlHash);if(d)e.add(d)}}return e}async filterUnvisited(t,r){let e=await this.hasMany(t,r);return t.filter((i)=>!e.has(i))}async delete(t,r){if(this.closed)return!1;await this.initialize();let e=this.getUrlKey(t);if(r)this.db.run("DELETE FROM urls WHERE urlHash = ? AND namespace = ?",e,r);else this.db.run("DELETE FROM urls WHERE urlHash = ?",e);return!0}async clear(t){if(this.closed)return;if(await this.initialize(),t)this.db.run("DELETE FROM urls WHERE namespace = ?",t);else this.db.exec("DELETE FROM urls")}async cleanup(){if(this.closed)return 0;await this.initialize();let t=Date.now(),r=this.db.get("SELECT COUNT(*) as cnt FROM urls");this.db.run("DELETE FROM urls WHERE expiresAt < ?",t);let e=this.db.get("SELECT COUNT(*) as cnt FROM urls");return(r?.cnt||0)-(e?.cnt||0)}async count(t){if(this.closed)return 0;await this.initialize();let r=Date.now();if(t)return this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt >= ?",t,r)?.cnt||0;return this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt >= ?",r)?.cnt||0}async stats(t){if(this.closed)return{total:0,expired:0,namespaces:0};await this.initialize();let r=Date.now(),e,i;if(t)e=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ?",t),i=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt < ?",t,r);else e=this.db.get("SELECT COUNT(*) as cnt FROM urls"),i=this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt < ?",r);let s=this.db.get("SELECT COUNT(DISTINCT namespace) as cnt FROM urls");return{total:e?.cnt||0,expired:i?.cnt||0,namespaces:s?.cnt||0}}async close(){if(this.closed)return;if(this.closed=!0,await this.initPromise,this.db){try{this.db.close()}catch{}this.db=null}}get isClosed(){return this.closed}get path(){return this.dbPath}}var H=h;export{H as default,h as UrlStore};