maxun-core 0.0.16 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/browserSide/scraper.js +21 -1
- package/build/interpret.js +2 -0
- package/package.json +1 -1
|
@@ -636,6 +636,26 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
636
636
|
return similarity >= similarityThreshold;
|
|
637
637
|
});
|
|
638
638
|
}
|
|
639
|
+
function tryFallbackSelector(rootElement, originalSelector) {
|
|
640
|
+
let element = queryElement(rootElement, originalSelector);
|
|
641
|
+
if (!element && originalSelector.includes('nth-child')) {
|
|
642
|
+
const match = originalSelector.match(/nth-child\((\d+)\)/);
|
|
643
|
+
if (match) {
|
|
644
|
+
const position = parseInt(match[1], 10);
|
|
645
|
+
for (let i = position - 1; i >= 1; i--) {
|
|
646
|
+
const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`);
|
|
647
|
+
element = queryElement(rootElement, fallbackSelector);
|
|
648
|
+
if (element)
|
|
649
|
+
break;
|
|
650
|
+
}
|
|
651
|
+
if (!element) {
|
|
652
|
+
const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, '');
|
|
653
|
+
element = queryElement(rootElement, baseSelector);
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return element;
|
|
658
|
+
}
|
|
639
659
|
// Main scraping logic with context support
|
|
640
660
|
let containers = queryElementAll(document, listSelector);
|
|
641
661
|
containers = Array.from(containers);
|
|
@@ -786,7 +806,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
786
806
|
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
|
|
787
807
|
// Get the last part of the selector after any context delimiter
|
|
788
808
|
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
|
789
|
-
const element =
|
|
809
|
+
const element = tryFallbackSelector(container, relativeSelector);
|
|
790
810
|
if (element) {
|
|
791
811
|
record[label] = extractValue(element, attribute);
|
|
792
812
|
}
|
package/build/interpret.js
CHANGED
|
@@ -700,6 +700,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
700
700
|
button.click()
|
|
701
701
|
]);
|
|
702
702
|
debugLog("Navigation successful after regular click");
|
|
703
|
+
yield page.waitForTimeout(2000);
|
|
703
704
|
paginationSuccess = true;
|
|
704
705
|
}
|
|
705
706
|
catch (navError) {
|
|
@@ -715,6 +716,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
715
716
|
button.dispatchEvent('click')
|
|
716
717
|
]);
|
|
717
718
|
debugLog("Navigation successful after dispatch event");
|
|
719
|
+
yield page.waitForTimeout(2000);
|
|
718
720
|
paginationSuccess = true;
|
|
719
721
|
}
|
|
720
722
|
catch (dispatchNavError) {
|