maxun-core 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -636,6 +636,26 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
636
636
  return similarity >= similarityThreshold;
637
637
  });
638
638
  }
639
+ function tryFallbackSelector(rootElement, originalSelector) {
640
+ let element = queryElement(rootElement, originalSelector);
641
+ if (!element && originalSelector.includes('nth-child')) {
642
+ const match = originalSelector.match(/nth-child\((\d+)\)/);
643
+ if (match) {
644
+ const position = parseInt(match[1], 10);
645
+ for (let i = position - 1; i >= 1; i--) {
646
+ const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`);
647
+ element = queryElement(rootElement, fallbackSelector);
648
+ if (element)
649
+ break;
650
+ }
651
+ if (!element) {
652
+ const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, '');
653
+ element = queryElement(rootElement, baseSelector);
654
+ }
655
+ }
656
+ }
657
+ return element;
658
+ }
639
659
  // Main scraping logic with context support
640
660
  let containers = queryElementAll(document, listSelector);
641
661
  containers = Array.from(containers);
@@ -786,7 +806,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
786
806
  for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
787
807
  // Get the last part of the selector after any context delimiter
788
808
  const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
789
- const element = queryElement(container, relativeSelector);
809
+ const element = tryFallbackSelector(container, relativeSelector);
790
810
  if (element) {
791
811
  record[label] = extractValue(element, attribute);
792
812
  }
@@ -41,6 +41,7 @@ interface InterpreterOptions {
41
41
  activeId: (id: number) => void;
42
42
  debugMessage: (msg: string) => void;
43
43
  setActionType: (type: string) => void;
44
+ incrementScrapeListIndex: () => void;
44
45
  }>;
45
46
  }
46
47
  /**
@@ -385,7 +385,7 @@ class Interpreter extends events_1.EventEmitter {
385
385
  yield this.options.serializableCallback([mergedResult]);
386
386
  }),
387
387
  scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
388
- var _f;
388
+ var _f, _g;
389
389
  if ((_f = this.options.debugChannel) === null || _f === void 0 ? void 0 : _f.setActionType) {
390
390
  this.options.debugChannel.setActionType('scrapeList');
391
391
  }
@@ -394,6 +394,9 @@ class Interpreter extends events_1.EventEmitter {
394
394
  return;
395
395
  }
396
396
  yield this.ensureScriptsLoaded(page);
397
+ if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.incrementScrapeListIndex) {
398
+ this.options.debugChannel.incrementScrapeListIndex();
399
+ }
397
400
  if (!config.pagination) {
398
401
  const scrapeResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
399
402
  yield this.options.serializableCallback(scrapeResults);
@@ -404,8 +407,8 @@ class Interpreter extends events_1.EventEmitter {
404
407
  }
405
408
  }),
406
409
  scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
407
- var _g;
408
- if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.setActionType) {
410
+ var _h;
411
+ if ((_h = this.options.debugChannel) === null || _h === void 0 ? void 0 : _h.setActionType) {
409
412
  this.options.debugChannel.setActionType('scrapeListAuto');
410
413
  }
411
414
  yield this.ensureScriptsLoaded(page);
@@ -415,8 +418,8 @@ class Interpreter extends events_1.EventEmitter {
415
418
  yield this.options.serializableCallback(scrapeResults);
416
419
  }),
417
420
  scroll: (pages) => __awaiter(this, void 0, void 0, function* () {
418
- var _h;
419
- if ((_h = this.options.debugChannel) === null || _h === void 0 ? void 0 : _h.setActionType) {
421
+ var _j;
422
+ if ((_j = this.options.debugChannel) === null || _j === void 0 ? void 0 : _j.setActionType) {
420
423
  this.options.debugChannel.setActionType('scroll');
421
424
  }
422
425
  yield page.evaluate((pagesInternal) => __awaiter(this, void 0, void 0, function* () {
@@ -427,8 +430,8 @@ class Interpreter extends events_1.EventEmitter {
427
430
  }), pages !== null && pages !== void 0 ? pages : 1);
428
431
  }),
429
432
  script: (code) => __awaiter(this, void 0, void 0, function* () {
430
- var _j;
431
- if ((_j = this.options.debugChannel) === null || _j === void 0 ? void 0 : _j.setActionType) {
433
+ var _k;
434
+ if ((_k = this.options.debugChannel) === null || _k === void 0 ? void 0 : _k.setActionType) {
432
435
  this.options.debugChannel.setActionType('script');
433
436
  }
434
437
  const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
@@ -530,6 +533,7 @@ class Interpreter extends events_1.EventEmitter {
530
533
  });
531
534
  allResults = allResults.concat(newResults);
532
535
  debugLog("Results collected:", allResults.length);
536
+ yield this.options.serializableCallback(allResults);
533
537
  });
534
538
  const checkLimit = () => {
535
539
  if (config.limit && allResults.length >= config.limit) {
@@ -700,6 +704,7 @@ class Interpreter extends events_1.EventEmitter {
700
704
  button.click()
701
705
  ]);
702
706
  debugLog("Navigation successful after regular click");
707
+ yield page.waitForTimeout(2000);
703
708
  paginationSuccess = true;
704
709
  }
705
710
  catch (navError) {
@@ -715,6 +720,7 @@ class Interpreter extends events_1.EventEmitter {
715
720
  button.dispatchEvent('click')
716
721
  ]);
717
722
  debugLog("Navigation successful after dispatch event");
723
+ yield page.waitForTimeout(2000);
718
724
  paginationSuccess = true;
719
725
  }
720
726
  catch (dispatchNavError) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.16",
3
+ "version": "0.0.18",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",