maxun-core 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.js +32 -6
- package/package.json +1 -1
package/build/interpret.js
CHANGED
|
@@ -477,6 +477,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
477
477
|
let visitedUrls = new Set();
|
|
478
478
|
const MAX_RETRIES = 3;
|
|
479
479
|
const RETRY_DELAY = 1000; // 1 second delay between retries
|
|
480
|
+
const MAX_UNCHANGED_RESULTS = 5;
|
|
480
481
|
const debugLog = (message, ...args) => {
|
|
481
482
|
console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
|
|
482
483
|
};
|
|
@@ -555,30 +556,55 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
555
556
|
}
|
|
556
557
|
});
|
|
557
558
|
let availableSelectors = config.pagination.selector.split(',');
|
|
559
|
+
let unchangedResultCounter = 0;
|
|
558
560
|
try {
|
|
559
561
|
while (true) {
|
|
560
|
-
// Reduced timeout for faster performance
|
|
561
|
-
yield page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { });
|
|
562
562
|
switch (config.pagination.type) {
|
|
563
563
|
case 'scrollDown': {
|
|
564
|
+
let previousResultCount = allResults.length;
|
|
565
|
+
yield scrapeCurrentPage();
|
|
566
|
+
if (checkLimit()) {
|
|
567
|
+
return allResults;
|
|
568
|
+
}
|
|
564
569
|
yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
565
570
|
yield page.waitForTimeout(2000);
|
|
566
571
|
const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
|
|
572
|
+
const currentResultCount = allResults.length;
|
|
573
|
+
if (currentResultCount === previousResultCount) {
|
|
574
|
+
unchangedResultCounter++;
|
|
575
|
+
if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) {
|
|
576
|
+
return allResults;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
else {
|
|
580
|
+
unchangedResultCounter = 0;
|
|
581
|
+
}
|
|
567
582
|
if (currentHeight === previousHeight) {
|
|
568
|
-
const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
569
|
-
allResults = allResults.concat(finalResults);
|
|
570
583
|
return allResults;
|
|
571
584
|
}
|
|
572
585
|
previousHeight = currentHeight;
|
|
573
586
|
break;
|
|
574
587
|
}
|
|
575
588
|
case 'scrollUp': {
|
|
589
|
+
let previousResultCount = allResults.length;
|
|
590
|
+
yield scrapeCurrentPage();
|
|
591
|
+
if (checkLimit()) {
|
|
592
|
+
return allResults;
|
|
593
|
+
}
|
|
576
594
|
yield page.evaluate(() => window.scrollTo(0, 0));
|
|
577
595
|
yield page.waitForTimeout(2000);
|
|
578
596
|
const currentTopHeight = yield page.evaluate(() => document.documentElement.scrollTop);
|
|
597
|
+
const currentResultCount = allResults.length;
|
|
598
|
+
if (currentResultCount === previousResultCount) {
|
|
599
|
+
unchangedResultCounter++;
|
|
600
|
+
if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) {
|
|
601
|
+
return allResults;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
else {
|
|
605
|
+
unchangedResultCounter = 0;
|
|
606
|
+
}
|
|
579
607
|
if (currentTopHeight === 0) {
|
|
580
|
-
const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
581
|
-
allResults = allResults.concat(finalResults);
|
|
582
608
|
return allResults;
|
|
583
609
|
}
|
|
584
610
|
previousHeight = currentTopHeight;
|