rezo 1.0.43 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -15
- package/dist/cache/index.js +0 -3
- package/dist/crawler/addon/decodo/index.cjs +1 -0
- package/dist/crawler/addon/decodo/index.js +1 -0
- package/dist/crawler/crawler-options.cjs +1 -0
- package/dist/crawler/crawler-options.js +1 -0
- package/dist/{plugin → crawler}/crawler.cjs +392 -32
- package/dist/{plugin → crawler}/crawler.js +392 -32
- package/dist/crawler/index.cjs +40 -0
- package/dist/{plugin → crawler}/index.js +4 -2
- package/dist/crawler/plugin/file-cacher.cjs +19 -0
- package/dist/crawler/plugin/file-cacher.js +19 -0
- package/dist/crawler/plugin/index.cjs +1 -0
- package/dist/crawler/plugin/index.js +1 -0
- package/dist/crawler/plugin/navigation-history.cjs +43 -0
- package/dist/crawler/plugin/navigation-history.js +43 -0
- package/dist/crawler/plugin/robots-txt.cjs +2 -0
- package/dist/crawler/plugin/robots-txt.js +2 -0
- package/dist/crawler/plugin/url-store.cjs +18 -0
- package/dist/crawler/plugin/url-store.js +18 -0
- package/dist/crawler.d.ts +315 -172
- package/dist/entries/crawler.cjs +5 -5
- package/dist/entries/crawler.js +2 -2
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/package.json +2 -6
- package/dist/cache/file-cacher.cjs +0 -270
- package/dist/cache/file-cacher.js +0 -267
- package/dist/cache/navigation-history.cjs +0 -298
- package/dist/cache/navigation-history.js +0 -296
- package/dist/cache/url-store.cjs +0 -294
- package/dist/cache/url-store.js +0 -291
- package/dist/plugin/addon/decodo/index.cjs +0 -1
- package/dist/plugin/addon/decodo/index.js +0 -1
- package/dist/plugin/crawler-options.cjs +0 -1
- package/dist/plugin/crawler-options.js +0 -1
- package/dist/plugin/index.cjs +0 -36
- /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
- /package/dist/{plugin → crawler}/scraper.cjs +0 -0
- /package/dist/{plugin → crawler}/scraper.js +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
|
-
import { FileCacher } from '
|
|
3
|
-
import { UrlStore } from '
|
|
4
|
-
import { NavigationHistory } from '
|
|
2
|
+
import { FileCacher } from './plugin/file-cacher.js';
|
|
3
|
+
import { UrlStore } from './plugin/url-store.js';
|
|
4
|
+
import { NavigationHistory } from './plugin/navigation-history.js';
|
|
5
|
+
import { RobotsTxt } from './plugin/robots-txt.js';
|
|
5
6
|
import { parseHTML } from "linkedom";
|
|
6
7
|
import path from "node:path";
|
|
7
8
|
import { Rezo } from '../core/rezo.js';
|
|
@@ -54,6 +55,23 @@ export class Crawler {
|
|
|
54
55
|
navigationHistoryInitPromise = null;
|
|
55
56
|
adapterExecutor = null;
|
|
56
57
|
adapterType;
|
|
58
|
+
pendingExecutions = new Set;
|
|
59
|
+
robotsTxt;
|
|
60
|
+
domainResponseTimes = new Map;
|
|
61
|
+
domainCurrentDelay = new Map;
|
|
62
|
+
crawlStats = {
|
|
63
|
+
urlsVisited: 0,
|
|
64
|
+
urlsQueued: 0,
|
|
65
|
+
urlsFailed: 0,
|
|
66
|
+
startTime: 0,
|
|
67
|
+
currentDepth: 0
|
|
68
|
+
};
|
|
69
|
+
urlDepthMap = new Map;
|
|
70
|
+
startHandlers = [];
|
|
71
|
+
finishHandlers = [];
|
|
72
|
+
redirectHandlers = [];
|
|
73
|
+
collectedData = [];
|
|
74
|
+
crawlStarted = false;
|
|
57
75
|
constructor(crawlerOptions, http = new Rezo) {
|
|
58
76
|
this.http = http;
|
|
59
77
|
this.queue = new RezoQueue({
|
|
@@ -71,7 +89,6 @@ export class Crawler {
|
|
|
71
89
|
fs.mkdirSync(path.dirname(dbUrl), { recursive: true });
|
|
72
90
|
FileCacher.create({
|
|
73
91
|
cacheDir: dbUrl,
|
|
74
|
-
softDelete: false,
|
|
75
92
|
ttl: cacheTTL,
|
|
76
93
|
encryptNamespace: true
|
|
77
94
|
}).then((storage) => {
|
|
@@ -110,6 +127,14 @@ export class Crawler {
|
|
|
110
127
|
}
|
|
111
128
|
this.initializeAdapter();
|
|
112
129
|
this.leadsFinder = new Scraper(this.http, this.config, this._onEmailLeads.bind(this), this._onEmailDiscovered.bind(this), this.config.debug);
|
|
130
|
+
this.robotsTxt = new RobotsTxt({
|
|
131
|
+
userAgent: this.config.userAgent || "RezoBot",
|
|
132
|
+
cacheTTL: 24 * 60 * 60 * 1000
|
|
133
|
+
});
|
|
134
|
+
this.crawlStats.startTime = Date.now();
|
|
135
|
+
if (this.config.baseUrl) {
|
|
136
|
+
this.urlDepthMap.set(this.config.baseUrl, 0);
|
|
137
|
+
}
|
|
113
138
|
}
|
|
114
139
|
async initializeAdapter() {
|
|
115
140
|
try {
|
|
@@ -148,6 +173,8 @@ export class Crawler {
|
|
|
148
173
|
async waitForNavigationHistory() {
|
|
149
174
|
if (!this.config.enableNavigationHistory)
|
|
150
175
|
return;
|
|
176
|
+
if (this.isNavigationHistoryReady && this.isSessionReady)
|
|
177
|
+
return;
|
|
151
178
|
if (this.navigationHistoryInitPromise) {
|
|
152
179
|
await this.navigationHistoryInitPromise;
|
|
153
180
|
}
|
|
@@ -361,6 +388,18 @@ export class Crawler {
|
|
|
361
388
|
this.emailLeadsEvents.push(handler);
|
|
362
389
|
return this;
|
|
363
390
|
}
|
|
391
|
+
onStart(handler) {
|
|
392
|
+
this.startHandlers.push(handler);
|
|
393
|
+
return this;
|
|
394
|
+
}
|
|
395
|
+
onFinish(handler) {
|
|
396
|
+
this.finishHandlers.push(handler);
|
|
397
|
+
return this;
|
|
398
|
+
}
|
|
399
|
+
onRedirect(handler) {
|
|
400
|
+
this.redirectHandlers.push(handler);
|
|
401
|
+
return this;
|
|
402
|
+
}
|
|
364
403
|
onRawData(handler) {
|
|
365
404
|
this.rawResponseEvents.push({
|
|
366
405
|
handler: "_onRawResponse",
|
|
@@ -435,20 +474,25 @@ export class Crawler {
|
|
|
435
474
|
this.queue.add(() => handler(document.body));
|
|
436
475
|
}
|
|
437
476
|
_onAttribute(selection, attribute, handler, document) {
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
const elements = document.querySelectorAll(
|
|
477
|
+
const isSimpleForm = typeof attribute === "function";
|
|
478
|
+
const actualAttribute = isSimpleForm ? selection : attribute;
|
|
479
|
+
const actualHandler = isSimpleForm ? attribute : handler;
|
|
480
|
+
const actualSelection = isSimpleForm ? `[${selection}]` : selection || `[${attribute}]`;
|
|
481
|
+
const elements = document.querySelectorAll(actualSelection);
|
|
443
482
|
for (let i = 0;i < elements.length; i++) {
|
|
444
|
-
|
|
445
|
-
|
|
483
|
+
const el = elements[i];
|
|
484
|
+
if (el.hasAttribute(actualAttribute)) {
|
|
485
|
+
const value = el.getAttribute(actualAttribute);
|
|
486
|
+
this.queue.add(() => actualHandler.call(el, value, actualAttribute));
|
|
487
|
+
}
|
|
446
488
|
}
|
|
447
489
|
}
|
|
448
490
|
_onText(selection, handler, document) {
|
|
449
491
|
const elements = document.querySelectorAll(selection);
|
|
450
492
|
for (let i = 0;i < elements.length; i++) {
|
|
451
|
-
|
|
493
|
+
const el = elements[i];
|
|
494
|
+
const text = el.textContent;
|
|
495
|
+
this.queue.add(() => handler.call(el, text));
|
|
452
496
|
}
|
|
453
497
|
}
|
|
454
498
|
_onSelection(selection, handler, document) {
|
|
@@ -466,8 +510,11 @@ export class Crawler {
|
|
|
466
510
|
_onHref(handler, document) {
|
|
467
511
|
const elements = document.querySelectorAll("a, link");
|
|
468
512
|
for (let i = 0;i < elements.length; i++) {
|
|
469
|
-
|
|
470
|
-
|
|
513
|
+
const el = elements[i];
|
|
514
|
+
if (el.hasAttribute("href")) {
|
|
515
|
+
const href = new URL(el.getAttribute("href"), document.URL).href;
|
|
516
|
+
this.queue.add(() => handler.call(el, href));
|
|
517
|
+
}
|
|
471
518
|
}
|
|
472
519
|
}
|
|
473
520
|
_onAnchor(selection, handler, document) {
|
|
@@ -501,6 +548,233 @@ export class Crawler {
|
|
|
501
548
|
_onResponse(handler, response) {
|
|
502
549
|
this.queue.add(() => handler(response));
|
|
503
550
|
}
|
|
551
|
+
calculateAutoThrottleDelay(domain, responseTime) {
|
|
552
|
+
if (!this.config.autoThrottle)
|
|
553
|
+
return 0;
|
|
554
|
+
let times = this.domainResponseTimes.get(domain) || [];
|
|
555
|
+
times.push(responseTime);
|
|
556
|
+
if (times.length > 10) {
|
|
557
|
+
times = times.slice(-10);
|
|
558
|
+
}
|
|
559
|
+
this.domainResponseTimes.set(domain, times);
|
|
560
|
+
const avgResponseTime = times.reduce((a, b) => a + b, 0) / times.length;
|
|
561
|
+
const targetDelay = this.config.autoThrottleTargetDelay;
|
|
562
|
+
const loadFactor = avgResponseTime / 200;
|
|
563
|
+
let newDelay = Math.round(targetDelay * loadFactor);
|
|
564
|
+
newDelay = Math.max(this.config.autoThrottleMinDelay, newDelay);
|
|
565
|
+
newDelay = Math.min(this.config.autoThrottleMaxDelay, newDelay);
|
|
566
|
+
this.domainCurrentDelay.set(domain, newDelay);
|
|
567
|
+
if (this.config.debug) {
|
|
568
|
+
console.log(`[AutoThrottle] ${domain}: avgRT=${avgResponseTime.toFixed(0)}ms, delay=${newDelay}ms`);
|
|
569
|
+
}
|
|
570
|
+
return newDelay;
|
|
571
|
+
}
|
|
572
|
+
getAutoThrottleDelay(domain) {
|
|
573
|
+
if (!this.config.autoThrottle)
|
|
574
|
+
return 0;
|
|
575
|
+
return this.domainCurrentDelay.get(domain) || this.config.autoThrottleMinDelay;
|
|
576
|
+
}
|
|
577
|
+
async handle429Response(url, response) {
|
|
578
|
+
let retryAfter = 0;
|
|
579
|
+
const retryAfterHeader = response?.headers?.["retry-after"] || response?.headers?.get?.("retry-after");
|
|
580
|
+
if (retryAfterHeader) {
|
|
581
|
+
const parsed = parseInt(retryAfterHeader, 10);
|
|
582
|
+
if (!isNaN(parsed)) {
|
|
583
|
+
retryAfter = parsed * 1000;
|
|
584
|
+
} else {
|
|
585
|
+
const date = new Date(retryAfterHeader);
|
|
586
|
+
if (!isNaN(date.getTime())) {
|
|
587
|
+
retryAfter = date.getTime() - Date.now();
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
if (retryAfter <= 0) {
|
|
592
|
+
retryAfter = 60000;
|
|
593
|
+
}
|
|
594
|
+
const maxWait = this.config.maxWaitOn429;
|
|
595
|
+
const alwaysWait = this.config.alwaysWaitOn429;
|
|
596
|
+
if (retryAfter > maxWait && !alwaysWait) {
|
|
597
|
+
const waitMinutes = Math.round(retryAfter / 60000);
|
|
598
|
+
const error = new Error(`Rate limited: Server requested wait time of ${waitMinutes} minutes, which exceeds maxWaitOn429 (${Math.round(maxWait / 60000)} minutes). Set alwaysWaitOn429: true to wait regardless.`);
|
|
599
|
+
error.code = "REZ_RATE_LIMIT_EXCEEDED";
|
|
600
|
+
error.url = url;
|
|
601
|
+
error.status = 429;
|
|
602
|
+
throw error;
|
|
603
|
+
}
|
|
604
|
+
if (retryAfter > maxWait && alwaysWait) {
|
|
605
|
+
const waitMinutes = Math.round(retryAfter / 60000);
|
|
606
|
+
console.warn(`[Crawler] WARNING: Rate limited on ${url}. Server requested ${waitMinutes} minute wait. Waiting because alwaysWaitOn429 is enabled.`);
|
|
607
|
+
}
|
|
608
|
+
if (this.config.debug) {
|
|
609
|
+
console.log(`[Crawler] 429 Rate Limited: waiting ${Math.round(retryAfter / 1000)}s before retry`);
|
|
610
|
+
}
|
|
611
|
+
return { shouldRetry: true, waitTime: retryAfter };
|
|
612
|
+
}
|
|
613
|
+
async checkCrawlLimits(url, parentUrl) {
|
|
614
|
+
if (this.config.maxUrls > 0 && this.crawlStats.urlsVisited >= this.config.maxUrls) {
|
|
615
|
+
return { allowed: false, reason: `maxUrls limit reached (${this.config.maxUrls})` };
|
|
616
|
+
}
|
|
617
|
+
if (this.config.maxDepth > 0) {
|
|
618
|
+
const parentDepth = parentUrl ? this.urlDepthMap.get(parentUrl) ?? 0 : 0;
|
|
619
|
+
const urlDepth = this.urlDepthMap.get(url) ?? parentDepth + 1;
|
|
620
|
+
if (urlDepth > this.config.maxDepth) {
|
|
621
|
+
return { allowed: false, reason: `maxDepth limit reached (depth ${urlDepth} > ${this.config.maxDepth})` };
|
|
622
|
+
}
|
|
623
|
+
if (!this.urlDepthMap.has(url)) {
|
|
624
|
+
this.urlDepthMap.set(url, urlDepth);
|
|
625
|
+
this.crawlStats.currentDepth = Math.max(this.crawlStats.currentDepth, urlDepth);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
if (this.config.respectRobotsTxt) {
|
|
629
|
+
try {
|
|
630
|
+
if (!this.robotsTxt.isCached(url)) {
|
|
631
|
+
await this.robotsTxt.fetch(url, async (robotsUrl) => {
|
|
632
|
+
const response = await this.http.get(robotsUrl, { timeout: 1e4 });
|
|
633
|
+
return { status: response.status, data: response.data };
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
const allowed = this.robotsTxt.isAllowed(url);
|
|
637
|
+
if (!allowed) {
|
|
638
|
+
return { allowed: false, reason: "Blocked by robots.txt" };
|
|
639
|
+
}
|
|
640
|
+
} catch (error) {
|
|
641
|
+
if (this.config.debug) {
|
|
642
|
+
console.warn(`[Crawler] Failed to check robots.txt for ${url}:`, error);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
return { allowed: true };
|
|
647
|
+
}
|
|
648
|
+
shouldFollowLink(element) {
|
|
649
|
+
if (this.config.followNofollow) {
|
|
650
|
+
return true;
|
|
651
|
+
}
|
|
652
|
+
const rel = element.getAttribute("rel");
|
|
653
|
+
if (rel && rel.toLowerCase().includes("nofollow")) {
|
|
654
|
+
return false;
|
|
655
|
+
}
|
|
656
|
+
return true;
|
|
657
|
+
}
|
|
658
|
+
checkResponseSize(contentLength) {
|
|
659
|
+
if (this.config.maxResponseSize > 0 && contentLength > this.config.maxResponseSize) {
|
|
660
|
+
return {
|
|
661
|
+
allowed: false,
|
|
662
|
+
reason: `Response size (${contentLength} bytes) exceeds maxResponseSize (${this.config.maxResponseSize} bytes)`
|
|
663
|
+
};
|
|
664
|
+
}
|
|
665
|
+
return { allowed: true };
|
|
666
|
+
}
|
|
667
|
+
collect(data) {
|
|
668
|
+
this.collectedData.push(data);
|
|
669
|
+
return this;
|
|
670
|
+
}
|
|
671
|
+
getCollectedData() {
|
|
672
|
+
return [...this.collectedData];
|
|
673
|
+
}
|
|
674
|
+
clearCollectedData() {
|
|
675
|
+
this.collectedData = [];
|
|
676
|
+
return this;
|
|
677
|
+
}
|
|
678
|
+
async exportData(filePath, format = "json") {
|
|
679
|
+
const data = this.collectedData;
|
|
680
|
+
if (data.length === 0) {
|
|
681
|
+
if (this.config.debug) {
|
|
682
|
+
console.warn("[Crawler] No data to export");
|
|
683
|
+
}
|
|
684
|
+
return;
|
|
685
|
+
}
|
|
686
|
+
let content;
|
|
687
|
+
switch (format) {
|
|
688
|
+
case "json":
|
|
689
|
+
content = JSON.stringify(data, null, 2);
|
|
690
|
+
break;
|
|
691
|
+
case "jsonl":
|
|
692
|
+
content = data.map((item) => JSON.stringify(item)).join(`
|
|
693
|
+
`);
|
|
694
|
+
break;
|
|
695
|
+
case "csv":
|
|
696
|
+
const keys = new Set;
|
|
697
|
+
data.forEach((item) => {
|
|
698
|
+
if (typeof item === "object" && item !== null) {
|
|
699
|
+
Object.keys(item).forEach((key) => keys.add(key));
|
|
700
|
+
}
|
|
701
|
+
});
|
|
702
|
+
const headers = Array.from(keys);
|
|
703
|
+
const escapeCSV = (val) => {
|
|
704
|
+
if (val === null || val === undefined)
|
|
705
|
+
return "";
|
|
706
|
+
const str = String(val);
|
|
707
|
+
if (str.includes(",") || str.includes('"') || str.includes(`
|
|
708
|
+
`)) {
|
|
709
|
+
return `"${str.replace(/"/g, '""')}"`;
|
|
710
|
+
}
|
|
711
|
+
return str;
|
|
712
|
+
};
|
|
713
|
+
const rows = [
|
|
714
|
+
headers.join(","),
|
|
715
|
+
...data.map((item) => {
|
|
716
|
+
if (typeof item !== "object" || item === null) {
|
|
717
|
+
return escapeCSV(item);
|
|
718
|
+
}
|
|
719
|
+
return headers.map((key) => escapeCSV(item[key])).join(",");
|
|
720
|
+
})
|
|
721
|
+
];
|
|
722
|
+
content = rows.join(`
|
|
723
|
+
`);
|
|
724
|
+
break;
|
|
725
|
+
default:
|
|
726
|
+
throw new Error(`Unsupported export format: ${format}`);
|
|
727
|
+
}
|
|
728
|
+
const dir = path.dirname(filePath);
|
|
729
|
+
if (!fs.existsSync(dir)) {
|
|
730
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
731
|
+
}
|
|
732
|
+
fs.writeFileSync(filePath, content, "utf-8");
|
|
733
|
+
if (this.config.debug) {
|
|
734
|
+
console.log(`[Crawler] Exported ${data.length} items to ${filePath} (${format})`);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
getStats() {
|
|
738
|
+
return { ...this.crawlStats };
|
|
739
|
+
}
|
|
740
|
+
async triggerStartHandlers() {
|
|
741
|
+
if (this.crawlStarted)
|
|
742
|
+
return;
|
|
743
|
+
this.crawlStarted = true;
|
|
744
|
+
this.crawlStats.startTime = Date.now();
|
|
745
|
+
for (const handler of this.startHandlers) {
|
|
746
|
+
try {
|
|
747
|
+
await handler();
|
|
748
|
+
} catch (error) {
|
|
749
|
+
if (this.config.debug) {
|
|
750
|
+
console.error("[Crawler] onStart handler error:", error);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
async triggerFinishHandlers() {
|
|
756
|
+
this.crawlStats.endTime = Date.now();
|
|
757
|
+
for (const handler of this.finishHandlers) {
|
|
758
|
+
try {
|
|
759
|
+
await handler(this.crawlStats);
|
|
760
|
+
} catch (error) {
|
|
761
|
+
if (this.config.debug) {
|
|
762
|
+
console.error("[Crawler] onFinish handler error:", error);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
async triggerRedirectHandlers(event) {
|
|
768
|
+
for (const handler of this.redirectHandlers) {
|
|
769
|
+
try {
|
|
770
|
+
await handler(event);
|
|
771
|
+
} catch (error) {
|
|
772
|
+
if (this.config.debug) {
|
|
773
|
+
console.error("[Crawler] onRedirect handler error:", error);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
504
778
|
buildUrl(url, params) {
|
|
505
779
|
if (params) {
|
|
506
780
|
const u = new URL(url, this.config.baseUrl);
|
|
@@ -570,16 +844,35 @@ export class Crawler {
|
|
|
570
844
|
this.addToNavigationQueue(url, method, body, headersObj);
|
|
571
845
|
}
|
|
572
846
|
if (deepEmailFinder) {
|
|
573
|
-
this.execute2(method, url, body, _options, forceRevisit)
|
|
847
|
+
const p = this.execute2(method, url, body, _options, forceRevisit);
|
|
848
|
+
this.pendingExecutions.add(p);
|
|
849
|
+
p.finally(() => this.pendingExecutions.delete(p));
|
|
574
850
|
return this;
|
|
575
851
|
}
|
|
576
|
-
this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions)
|
|
852
|
+
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions);
|
|
853
|
+
this.pendingExecutions.add(p);
|
|
854
|
+
p.finally(() => this.pendingExecutions.delete(p));
|
|
577
855
|
return this;
|
|
578
856
|
}
|
|
579
857
|
async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
|
|
580
|
-
|
|
858
|
+
await this.waitForStorage();
|
|
859
|
+
if (this.isCacheEnabled) {
|
|
860
|
+
await this.waitForCache();
|
|
861
|
+
}
|
|
862
|
+
if (this.config.enableNavigationHistory) {
|
|
863
|
+
await this.waitForNavigationHistory();
|
|
864
|
+
}
|
|
865
|
+
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions));
|
|
866
|
+
task.finally(() => this.pendingExecutions.delete(task));
|
|
581
867
|
}
|
|
582
868
|
async execute2(method, url, body, options = {}, forceRevisit) {
|
|
869
|
+
await this.waitForStorage();
|
|
870
|
+
if (this.isCacheEnabled) {
|
|
871
|
+
await this.waitForCache();
|
|
872
|
+
}
|
|
873
|
+
if (this.config.enableNavigationHistory) {
|
|
874
|
+
await this.waitForNavigationHistory();
|
|
875
|
+
}
|
|
583
876
|
this.queue.add(() => this.leadsFinder.parseExternalWebsite(url, method, body, {
|
|
584
877
|
httpConfig: options,
|
|
585
878
|
saveCache: this.saveCache.bind(this),
|
|
@@ -593,21 +886,34 @@ export class Crawler {
|
|
|
593
886
|
allowCrossDomainTravel: true
|
|
594
887
|
}, forceRevisit, true)).then();
|
|
595
888
|
}
|
|
596
|
-
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0) {
|
|
889
|
+
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl) {
|
|
597
890
|
try {
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
891
|
+
await this.triggerStartHandlers();
|
|
892
|
+
const limitCheck = await this.checkCrawlLimits(url, parentUrl);
|
|
893
|
+
if (!limitCheck.allowed) {
|
|
894
|
+
if (this.config.debug) {
|
|
895
|
+
console.log(`[Crawler] Skipping ${url}: ${limitCheck.reason}`);
|
|
896
|
+
}
|
|
897
|
+
return;
|
|
898
|
+
}
|
|
899
|
+
this.crawlStats.urlsQueued++;
|
|
900
|
+
const domain = new URL(url).hostname;
|
|
901
|
+
const delay = this.getAutoThrottleDelay(domain);
|
|
902
|
+
if (delay > 0) {
|
|
903
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
904
|
+
}
|
|
604
905
|
const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
|
|
605
906
|
const cache = await this.getCache(url);
|
|
606
907
|
if (isVisited && !cache)
|
|
607
908
|
return;
|
|
608
909
|
if (isVisited && method !== "GET")
|
|
609
910
|
return;
|
|
911
|
+
const requestStartTime = Date.now();
|
|
610
912
|
const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
|
|
913
|
+
if (!cache) {
|
|
914
|
+
const responseTime = Date.now() - requestStartTime;
|
|
915
|
+
this.calculateAutoThrottleDelay(domain, responseTime);
|
|
916
|
+
}
|
|
611
917
|
const res = {
|
|
612
918
|
data: response.data || response.content || "",
|
|
613
919
|
contentType: response.contentType || "",
|
|
@@ -619,11 +925,29 @@ export class Crawler {
|
|
|
619
925
|
cookies: response?.cookies?.serialized || response?.cookies,
|
|
620
926
|
contentLength: response.contentLength || 0
|
|
621
927
|
};
|
|
928
|
+
if (res.contentLength && res.contentLength > 0) {
|
|
929
|
+
const sizeCheck = this.checkResponseSize(res.contentLength);
|
|
930
|
+
if (!sizeCheck.allowed) {
|
|
931
|
+
if (this.config.debug) {
|
|
932
|
+
console.log(`[Crawler] Skipping ${url}: ${sizeCheck.reason}`);
|
|
933
|
+
}
|
|
934
|
+
return;
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
this.crawlStats.urlsVisited++;
|
|
938
|
+
if (res.finalUrl && res.finalUrl !== url && this.redirectHandlers.length > 0) {
|
|
939
|
+
await this.triggerRedirectHandlers({
|
|
940
|
+
originalUrl: url,
|
|
941
|
+
finalUrl: res.finalUrl,
|
|
942
|
+
redirectCount: response.redirectCount || 1,
|
|
943
|
+
statusCode: res.status
|
|
944
|
+
});
|
|
945
|
+
}
|
|
622
946
|
if (!cache)
|
|
623
947
|
await this.saveCache(url, res);
|
|
624
948
|
if (!isVisited)
|
|
625
949
|
await this.saveUrl(url);
|
|
626
|
-
this.markUrlVisited(url, {
|
|
950
|
+
await this.markUrlVisited(url, {
|
|
627
951
|
status: res.status,
|
|
628
952
|
finalUrl: res.finalUrl,
|
|
629
953
|
contentType: res.contentType
|
|
@@ -655,6 +979,24 @@ export class Crawler {
|
|
|
655
979
|
}
|
|
656
980
|
} catch (e) {
|
|
657
981
|
const error = e;
|
|
982
|
+
if (error?.response?.status === 429 || error?.status === 429) {
|
|
983
|
+
try {
|
|
984
|
+
const { shouldRetry, waitTime } = await this.handle429Response(url, error.response || error);
|
|
985
|
+
if (shouldRetry) {
|
|
986
|
+
await this.sleep(waitTime);
|
|
987
|
+
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl);
|
|
988
|
+
}
|
|
989
|
+
} catch (rateLimitError) {
|
|
990
|
+
this.crawlStats.urlsFailed++;
|
|
991
|
+
if (this.config.throwFatalError)
|
|
992
|
+
throw rateLimitError;
|
|
993
|
+
for (let i = 0;i < this.errorEvents.length; i++) {
|
|
994
|
+
const event = this.errorEvents[i];
|
|
995
|
+
this[event.handler](...event.attr, rateLimitError);
|
|
996
|
+
}
|
|
997
|
+
return;
|
|
998
|
+
}
|
|
999
|
+
}
|
|
658
1000
|
if (error && error.response) {
|
|
659
1001
|
const status = error.response.status;
|
|
660
1002
|
const retryDelay = this.config.retryDelay || 1000;
|
|
@@ -666,16 +1008,17 @@ export class Crawler {
|
|
|
666
1008
|
if (retryWithoutProxyOnStatusCode && options.proxy && retryWithoutProxyOnStatusCode.includes(status) && retryCount < maxRetryAttempts) {
|
|
667
1009
|
await this.sleep(retryDelay);
|
|
668
1010
|
delete options.proxy;
|
|
669
|
-
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
|
|
1011
|
+
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl);
|
|
670
1012
|
} else if (retryOnStatusCode && options.proxy && retryOnStatusCode.includes(status) && retryCount < maxRetryAttempts) {
|
|
671
1013
|
await this.sleep(retryDelay);
|
|
672
|
-
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
|
|
1014
|
+
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl);
|
|
673
1015
|
} else if (retryOnProxyError && options.proxy && retryCount < maxRetryOnProxyError) {
|
|
674
1016
|
await this.sleep(retryDelay);
|
|
675
|
-
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
|
|
1017
|
+
return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1, parentUrl);
|
|
676
1018
|
}
|
|
677
1019
|
}
|
|
678
|
-
this.
|
|
1020
|
+
this.crawlStats.urlsFailed++;
|
|
1021
|
+
await this.markUrlVisited(url, {
|
|
679
1022
|
status: error?.response?.status || 0,
|
|
680
1023
|
errorMessage: e.message || "Unknown error"
|
|
681
1024
|
});
|
|
@@ -684,7 +1027,6 @@ export class Crawler {
|
|
|
684
1027
|
if (this.config.debug) {
|
|
685
1028
|
console.log(`Error visiting ${url}: ${e.message}`);
|
|
686
1029
|
}
|
|
687
|
-
console.log(error);
|
|
688
1030
|
for (let i = 0;i < this.errorEvents.length; i++) {
|
|
689
1031
|
const event = this.errorEvents[i];
|
|
690
1032
|
this[event.handler](...event.attr, e);
|
|
@@ -692,17 +1034,35 @@ export class Crawler {
|
|
|
692
1034
|
}
|
|
693
1035
|
}
|
|
694
1036
|
async waitForAll() {
|
|
1037
|
+
if (this.pendingExecutions.size > 0) {
|
|
1038
|
+
await Promise.allSettled([...this.pendingExecutions]);
|
|
1039
|
+
}
|
|
695
1040
|
await this.queue.onIdle();
|
|
1041
|
+
await this.triggerFinishHandlers();
|
|
696
1042
|
}
|
|
697
1043
|
async done() {
|
|
698
1044
|
return this.waitForAll();
|
|
699
1045
|
}
|
|
700
1046
|
async close() {
|
|
701
1047
|
try {
|
|
702
|
-
await this.cacher
|
|
1048
|
+
await this.cacher?.close();
|
|
703
1049
|
} catch {}
|
|
704
1050
|
try {
|
|
705
|
-
await this.urlStorage
|
|
1051
|
+
await this.urlStorage?.close();
|
|
706
1052
|
} catch {}
|
|
1053
|
+
try {
|
|
1054
|
+
await this.navigationHistory?.close();
|
|
1055
|
+
} catch {}
|
|
1056
|
+
}
|
|
1057
|
+
async destroy() {
|
|
1058
|
+
this.queue.clear();
|
|
1059
|
+
this.events.length = 0;
|
|
1060
|
+
this.jsonEvents.length = 0;
|
|
1061
|
+
this.errorEvents.length = 0;
|
|
1062
|
+
this.responseEvents.length = 0;
|
|
1063
|
+
this.rawResponseEvents.length = 0;
|
|
1064
|
+
this.emailDiscoveredEvents.length = 0;
|
|
1065
|
+
this.emailLeadsEvents.length = 0;
|
|
1066
|
+
await this.close();
|
|
707
1067
|
}
|
|
708
1068
|
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
const _mod_uzdw07 = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_uzdw07.Crawler;;
|
|
3
|
+
const _mod_fk2i9s = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_fk2i9s.CrawlerOptions;;
|
|
5
|
+
const _mod_y9972f = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_y9972f.RobotsTxt;;
|
|
7
|
+
const _mod_y7t1zo = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_y7t1zo.FileCacher;;
|
|
9
|
+
const _mod_ecu03i = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_ecu03i.UrlStore;;
|
|
11
|
+
const _mod_kxfnq7 = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_kxfnq7.NavigationHistory;;
|
|
13
|
+
const _mod_bg9vts = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_bg9vts.Oxylabs;;
|
|
15
|
+
const _mod_zd2i1t = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_zd2i1t.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_zd2i1t.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_zd2i1t.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_zd2i1t.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_zd2i1t.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_zd2i1t.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_zd2i1t.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_zd2i1t.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_zd2i1t.getRandomGeoLocation;;
|
|
25
|
+
const _mod_gr8483 = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_gr8483.Decodo;;
|
|
27
|
+
const _mod_km63k6 = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_km63k6.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_km63k6.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_km63k6.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_km63k6.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_km63k6.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_km63k6.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_km63k6.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_km63k6.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_km63k6.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_km63k6.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_km63k6.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_km63k6.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_km63k6.generateSessionId;;
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
export { Crawler } from './crawler.js';
|
|
2
2
|
export { CrawlerOptions } from './crawler-options.js';
|
|
3
|
-
export {
|
|
4
|
-
export {
|
|
3
|
+
export { RobotsTxt } from './plugin/robots-txt.js';
|
|
4
|
+
export { FileCacher } from './plugin/file-cacher.js';
|
|
5
|
+
export { UrlStore } from './plugin/url-store.js';
|
|
6
|
+
export { NavigationHistory } from './plugin/navigation-history.js';
|
|
5
7
|
export { Oxylabs } from './addon/oxylabs/index.js';
|
|
6
8
|
export {
|
|
7
9
|
OXYLABS_BROWSER_TYPES,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
var x=Object.create;var{getPrototypeOf:A,defineProperty:d,getOwnPropertyNames:b}=Object;var m=Object.prototype.hasOwnProperty;var p=(e,r,c)=>{c=e!=null?x(A(e)):{};let t=r||!e||!e.__esModule?d(c,"default",{value:e,enumerable:!0}):c;for(let s of b(e))if(!m.call(t,s))d(t,s,{get:()=>e[s],enumerable:!0});return t};var y=require("node:fs"),R=require("node:path"),{createHash:O}=require("node:crypto"),f=require("node:zlib"),D=typeof globalThis.Bun<"u",E=typeof f.zstdCompressSync==="function";async function w(e){if(D){let{Database:t}=await import("bun:sqlite"),s=new t(e);return{run:(a,...n)=>s.run(a,...n),get:(a,...n)=>s.query(a).get(...n),all:(a,...n)=>s.query(a).all(...n),exec:(a)=>s.exec(a),close:()=>s.close()}}let{DatabaseSync:r}=await import("node:sqlite"),c=new r(e);return{run:(t,...s)=>{if(s.length===0)c.exec(t);else c.prepare(t).run(...s)},get:(t,...s)=>{return c.prepare(t).get(...s)},all:(t,...s)=>{return c.prepare(t).all(...s)},exec:(t)=>c.exec(t),close:()=>c.close()}}function T(e){if(E)return f.zstdCompressSync(e);return e}function g(e){if(E)return f.zstdDecompressSync(e);return e}class l{databases=new Map;options;cacheDir;closed=!1;constructor(e={}){if(this.options={cacheDir:e.cacheDir||"/tmp/rezo-crawler/cache",ttl:e.ttl||604800000,compression:e.compression??!1,encryptNamespace:e.encryptNamespace??!1,maxEntries:e.maxEntries??0},this.cacheDir=R.resolve(this.options.cacheDir),!y.existsSync(this.cacheDir))y.mkdirSync(this.cacheDir,{recursive:!0})}static async create(e={}){return new l(e)}async getDatabase(e){let r=this.options.encryptNamespace?O("md5").update(e).digest("hex"):e.replace(/[^a-zA-Z0-9_-]/g,"_");if(this.databases.has(r))return this.databases.get(r);let c=R.join(this.cacheDir,`${r}.db`),t=await w(c);return t.exec("PRAGMA journal_mode = WAL"),t.exec("PRAGMA synchronous = NORMAL"),t.exec("PRAGMA cache_size = -64000"),t.exec("PRAGMA temp_store = MEMORY"),t.exec("PRAGMA mmap_size = 268435456"),t.exec(`
|
|
2
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
3
|
+
key TEXT PRIMARY KEY,
|
|
4
|
+
value BLOB NOT NULL,
|
|
5
|
+
expiresAt INTEGER NOT NULL,
|
|
6
|
+
createdAt INTEGER NOT NULL,
|
|
7
|
+
compressed INTEGER DEFAULT 0
|
|
8
|
+
) WITHOUT ROWID
|
|
9
|
+
`),t.exec("CREATE INDEX IF NOT EXISTS idx_expires ON cache(expiresAt)"),this.databases.set(r,t),t}async set(e,r,c,t="default"){if(this.closed)throw Error("FileCacher is closed");let s=await this.getDatabase(t),a=Date.now(),n=a+(c??this.options.ttl),o=Buffer.from(JSON.stringify(r),"utf-8"),i=0;if(this.options.compression&&E)try{o=T(o),i=1}catch{}if(s.run(`
|
|
10
|
+
INSERT OR REPLACE INTO cache (key, value, expiresAt, createdAt, compressed)
|
|
11
|
+
VALUES (?, ?, ?, ?, ?)
|
|
12
|
+
`,e,Buffer.from(o).toString("base64"),n,a,i),this.options.maxEntries>0){let u=s.get("SELECT COUNT(*) as cnt FROM cache");if(u&&u.cnt>this.options.maxEntries){let h=u.cnt-this.options.maxEntries;s.run(`
|
|
13
|
+
DELETE FROM cache WHERE key IN (
|
|
14
|
+
SELECT key FROM cache ORDER BY createdAt ASC LIMIT ?
|
|
15
|
+
)
|
|
16
|
+
`,h)}}}async setMany(e,r="default"){if(this.closed)throw Error("FileCacher is closed");if(e.length===0)return;let c=await this.getDatabase(r),t=Date.now(),s=t+this.options.ttl;c.exec("BEGIN TRANSACTION");try{for(let a of e){let n=a.ttl?t+a.ttl:s,o=Buffer.from(JSON.stringify(a.value),"utf-8"),i=0;if(this.options.compression&&E)try{o=T(o),i=1}catch{}c.run(`
|
|
17
|
+
INSERT OR REPLACE INTO cache (key, value, expiresAt, createdAt, compressed)
|
|
18
|
+
VALUES (?, ?, ?, ?, ?)
|
|
19
|
+
`,a.key,Buffer.from(o).toString("base64"),n,t,i)}c.exec("COMMIT")}catch(a){throw c.exec("ROLLBACK"),a}}async get(e,r="default"){if(this.closed)throw Error("FileCacher is closed");let c=await this.getDatabase(r),t=c.get("SELECT value, expiresAt, compressed FROM cache WHERE key = ?",e);if(!t)return null;if(t.expiresAt<Date.now())return c.run("DELETE FROM cache WHERE key = ?",e),null;let s=Buffer.from(t.value,"base64");if(t.compressed)try{s=g(s)}catch{return null}try{return JSON.parse(Buffer.from(s).toString("utf-8"))}catch{return null}}async has(e,r="default"){if(this.closed)return!1;let t=(await this.getDatabase(r)).get("SELECT expiresAt FROM cache WHERE key = ?",e);if(!t)return!1;return t.expiresAt>=Date.now()}async hasMany(e,r="default"){if(this.closed)return new Set;if(e.length===0)return new Set;let c=await this.getDatabase(r),t=Date.now(),s=new Set,a=500;for(let n=0;n<e.length;n+=a){let o=e.slice(n,n+a),i=o.map(()=>"?").join(","),u=c.all(`SELECT key, expiresAt FROM cache WHERE key IN (${i})`,...o);for(let h of u)if(h.expiresAt>=t)s.add(h.key)}return s}async delete(e,r="default"){if(this.closed)return!1;return(await this.getDatabase(r)).run("DELETE FROM cache WHERE key = ?",e),!0}async clear(e="default"){if(this.closed)return;(await this.getDatabase(e)).exec("DELETE FROM cache")}async cleanup(e="default"){if(this.closed)return 0;let r=await this.getDatabase(e),c=Date.now(),t=r.get("SELECT COUNT(*) as cnt FROM cache");r.run("DELETE FROM cache WHERE expiresAt < ?",c);let s=r.get("SELECT COUNT(*) as cnt FROM cache");return(t?.cnt||0)-(s?.cnt||0)}async stats(e="default"){if(this.closed)return{count:0,expired:0};let r=await this.getDatabase(e),c=Date.now(),t=r.get("SELECT COUNT(*) as cnt FROM cache"),s=r.get("SELECT COUNT(*) as cnt FROM cache WHERE expiresAt < ?",c);return{count:t?.cnt||0,expired:s?.cnt||0}}async close(){if(this.closed)return;this.closed=!0;for(let e of this.databases.values())try{e.close()}catch{}this.databases.clear()}get isClosed(){return this.closed}get directory(){return this.cacheDir}}exports.FileCacher=l;exports.default=l;module.exports=Object.assign(l,exports);
|