maxun-core 0.0.30 → 0.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/build/interpret.js +49 -7
  2. package/package.json +1 -1
@@ -399,7 +399,13 @@ class Interpreter extends events_1.EventEmitter {
399
399
  return;
400
400
  }
401
401
  yield this.ensureScriptsLoaded(page);
402
- const scrapeResult = yield page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
402
+ const normalizedSchema = Object.fromEntries(Object.entries(schema).map(([key, value]) => [
403
+ key,
404
+ typeof value === 'string'
405
+ ? { selector: value, tag: '', attribute: 'innerText', shadow: '' }
406
+ : value,
407
+ ]));
408
+ const scrapeResult = yield page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), normalizedSchema);
403
409
  if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
404
410
  this.cumulativeResults = [];
405
411
  }
@@ -1496,8 +1502,17 @@ class Interpreter extends events_1.EventEmitter {
1496
1502
  scrapedItems.add(uniqueKey);
1497
1503
  return true;
1498
1504
  });
1499
- allResults = allResults.concat(newResults);
1500
- debugLog("Results collected:", allResults.length);
1505
+ let itemsToAdd = newResults;
1506
+ if (config.limit) {
1507
+ const remainingCapacity = config.limit - allResults.length;
1508
+ if (remainingCapacity <= 0) {
1509
+ itemsToAdd = [];
1510
+ }
1511
+ else if (newResults.length > remainingCapacity) {
1512
+ itemsToAdd = newResults.slice(0, remainingCapacity);
1513
+ }
1514
+ }
1515
+ allResults = allResults.concat(itemsToAdd);
1501
1516
  this.serializableDataByType[actionType][actionName] = [...allResults];
1502
1517
  yield this.options.serializableCallback({
1503
1518
  scrapeList: this.serializableDataByType.scrapeList,
@@ -1635,11 +1650,38 @@ class Interpreter extends events_1.EventEmitter {
1635
1650
  if (checkLimit()) {
1636
1651
  return allResults;
1637
1652
  }
1638
- yield page.evaluate(() => {
1639
- const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1640
- window.scrollTo(0, scrollHeight);
1641
- });
1653
+ const scrollIterations = 3;
1654
+ for (let i = 0; i < scrollIterations; i++) {
1655
+ yield page.evaluate(() => {
1656
+ window.scrollBy(0, window.innerHeight * 0.8);
1657
+ });
1658
+ yield page.waitForTimeout(500);
1659
+ }
1642
1660
  yield page.waitForTimeout(2000);
1661
+ try {
1662
+ yield page.evaluate((listSelector) => {
1663
+ const isXPath = listSelector.startsWith('//') || listSelector.startsWith('/');
1664
+ let lastElement = null;
1665
+ if (isXPath) {
1666
+ const result = document.evaluate(listSelector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
1667
+ if (result.snapshotLength > 0) {
1668
+ lastElement = result.snapshotItem(result.snapshotLength - 1);
1669
+ }
1670
+ }
1671
+ else {
1672
+ const elements = document.querySelectorAll(listSelector);
1673
+ if (elements.length > 0) {
1674
+ lastElement = elements[elements.length - 1];
1675
+ }
1676
+ }
1677
+ if (lastElement) {
1678
+ lastElement.scrollIntoView({ behavior: 'smooth', block: 'end' });
1679
+ }
1680
+ }, config.listSelector);
1681
+ yield page.waitForTimeout(1500);
1682
+ }
1683
+ catch (e) {
1684
+ }
1643
1685
  const currentHeight = yield page.evaluate(() => {
1644
1686
  return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1645
1687
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.30",
3
+ "version": "0.0.32",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",