maxun-core 0.0.30 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.js +49 -7
- package/package.json +1 -1
package/build/interpret.js
CHANGED
|
@@ -399,7 +399,13 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
399
399
|
return;
|
|
400
400
|
}
|
|
401
401
|
yield this.ensureScriptsLoaded(page);
|
|
402
|
-
const
|
|
402
|
+
const normalizedSchema = Object.fromEntries(Object.entries(schema).map(([key, value]) => [
|
|
403
|
+
key,
|
|
404
|
+
typeof value === 'string'
|
|
405
|
+
? { selector: value, tag: '', attribute: 'innerText', shadow: '' }
|
|
406
|
+
: value,
|
|
407
|
+
]));
|
|
408
|
+
const scrapeResult = yield page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), normalizedSchema);
|
|
403
409
|
if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
|
|
404
410
|
this.cumulativeResults = [];
|
|
405
411
|
}
|
|
@@ -1496,8 +1502,17 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1496
1502
|
scrapedItems.add(uniqueKey);
|
|
1497
1503
|
return true;
|
|
1498
1504
|
});
|
|
1499
|
-
|
|
1500
|
-
|
|
1505
|
+
let itemsToAdd = newResults;
|
|
1506
|
+
if (config.limit) {
|
|
1507
|
+
const remainingCapacity = config.limit - allResults.length;
|
|
1508
|
+
if (remainingCapacity <= 0) {
|
|
1509
|
+
itemsToAdd = [];
|
|
1510
|
+
}
|
|
1511
|
+
else if (newResults.length > remainingCapacity) {
|
|
1512
|
+
itemsToAdd = newResults.slice(0, remainingCapacity);
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
allResults = allResults.concat(itemsToAdd);
|
|
1501
1516
|
this.serializableDataByType[actionType][actionName] = [...allResults];
|
|
1502
1517
|
yield this.options.serializableCallback({
|
|
1503
1518
|
scrapeList: this.serializableDataByType.scrapeList,
|
|
@@ -1635,11 +1650,38 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1635
1650
|
if (checkLimit()) {
|
|
1636
1651
|
return allResults;
|
|
1637
1652
|
}
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1653
|
+
const scrollIterations = 3;
|
|
1654
|
+
for (let i = 0; i < scrollIterations; i++) {
|
|
1655
|
+
yield page.evaluate(() => {
|
|
1656
|
+
window.scrollBy(0, window.innerHeight * 0.8);
|
|
1657
|
+
});
|
|
1658
|
+
yield page.waitForTimeout(500);
|
|
1659
|
+
}
|
|
1642
1660
|
yield page.waitForTimeout(2000);
|
|
1661
|
+
try {
|
|
1662
|
+
yield page.evaluate((listSelector) => {
|
|
1663
|
+
const isXPath = listSelector.startsWith('//') || listSelector.startsWith('/');
|
|
1664
|
+
let lastElement = null;
|
|
1665
|
+
if (isXPath) {
|
|
1666
|
+
const result = document.evaluate(listSelector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
1667
|
+
if (result.snapshotLength > 0) {
|
|
1668
|
+
lastElement = result.snapshotItem(result.snapshotLength - 1);
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
else {
|
|
1672
|
+
const elements = document.querySelectorAll(listSelector);
|
|
1673
|
+
if (elements.length > 0) {
|
|
1674
|
+
lastElement = elements[elements.length - 1];
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
if (lastElement) {
|
|
1678
|
+
lastElement.scrollIntoView({ behavior: 'smooth', block: 'end' });
|
|
1679
|
+
}
|
|
1680
|
+
}, config.listSelector);
|
|
1681
|
+
yield page.waitForTimeout(1500);
|
|
1682
|
+
}
|
|
1683
|
+
catch (e) {
|
|
1684
|
+
}
|
|
1643
1685
|
const currentHeight = yield page.evaluate(() => {
|
|
1644
1686
|
return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
1645
1687
|
});
|