mx-cloud 0.0.11 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/browserSide/scraper.js +586 -130
- package/build/interpret.js +16 -17
- package/package.json +1 -1
|
@@ -359,20 +359,170 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
359
359
|
*/
|
|
360
360
|
window.scrapeList = function (_a) {
|
|
361
361
|
return __awaiter(this, arguments, void 0, function* ({ listSelector, fields, limit = 10 }) {
|
|
362
|
-
var _b;
|
|
363
362
|
// XPath evaluation functions
|
|
364
|
-
const
|
|
363
|
+
const queryInsideContext = (context, part) => {
|
|
365
364
|
try {
|
|
366
|
-
const
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
const
|
|
372
|
-
|
|
365
|
+
const { tagName, conditions } = parseXPathPart(part);
|
|
366
|
+
const candidateElements = Array.from(context.querySelectorAll(tagName));
|
|
367
|
+
if (candidateElements.length === 0) {
|
|
368
|
+
return [];
|
|
369
|
+
}
|
|
370
|
+
const matchingElements = candidateElements.filter((el) => {
|
|
371
|
+
return elementMatchesConditions(el, conditions);
|
|
372
|
+
});
|
|
373
|
+
return matchingElements;
|
|
373
374
|
}
|
|
374
|
-
catch (
|
|
375
|
-
console.
|
|
375
|
+
catch (err) {
|
|
376
|
+
console.error("Error in queryInsideContext:", err);
|
|
377
|
+
return [];
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
// Helper function to parse XPath part
|
|
381
|
+
const parseXPathPart = (part) => {
|
|
382
|
+
const tagMatch = part.match(/^([a-zA-Z0-9-]+)/);
|
|
383
|
+
const tagName = tagMatch ? tagMatch[1] : "*";
|
|
384
|
+
const conditionMatches = part.match(/\[([^\]]+)\]/g);
|
|
385
|
+
const conditions = conditionMatches
|
|
386
|
+
? conditionMatches.map((c) => c.slice(1, -1))
|
|
387
|
+
: [];
|
|
388
|
+
return { tagName, conditions };
|
|
389
|
+
};
|
|
390
|
+
// Helper function to check if element matches all conditions
|
|
391
|
+
const elementMatchesConditions = (element, conditions) => {
|
|
392
|
+
for (const condition of conditions) {
|
|
393
|
+
if (!elementMatchesCondition(element, condition)) {
|
|
394
|
+
return false;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return true;
|
|
398
|
+
};
|
|
399
|
+
// Helper function to check if element matches a single condition
|
|
400
|
+
const elementMatchesCondition = (element, condition) => {
|
|
401
|
+
var _a, _b;
|
|
402
|
+
condition = condition.trim();
|
|
403
|
+
if (/^\d+$/.test(condition)) {
|
|
404
|
+
return true;
|
|
405
|
+
}
|
|
406
|
+
// Handle @attribute="value"
|
|
407
|
+
const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/);
|
|
408
|
+
if (attrMatch) {
|
|
409
|
+
const [, attr, value] = attrMatch;
|
|
410
|
+
const elementValue = element.getAttribute(attr);
|
|
411
|
+
return elementValue === value;
|
|
412
|
+
}
|
|
413
|
+
// Handle contains(@class, 'value')
|
|
414
|
+
const classContainsMatch = condition.match(/^contains\(@class,\s*["']([^"']+)["']\)$/);
|
|
415
|
+
if (classContainsMatch) {
|
|
416
|
+
const className = classContainsMatch[1];
|
|
417
|
+
return element.classList.contains(className);
|
|
418
|
+
}
|
|
419
|
+
// Handle contains(@attribute, 'value')
|
|
420
|
+
const attrContainsMatch = condition.match(/^contains\(@([^,]+),\s*["']([^"']+)["']\)$/);
|
|
421
|
+
if (attrContainsMatch) {
|
|
422
|
+
const [, attr, value] = attrContainsMatch;
|
|
423
|
+
const elementValue = element.getAttribute(attr) || "";
|
|
424
|
+
return elementValue.includes(value);
|
|
425
|
+
}
|
|
426
|
+
// Handle text()="value"
|
|
427
|
+
const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/);
|
|
428
|
+
if (textMatch) {
|
|
429
|
+
const expectedText = textMatch[1];
|
|
430
|
+
const elementText = ((_a = element.textContent) === null || _a === void 0 ? void 0 : _a.trim()) || "";
|
|
431
|
+
return elementText === expectedText;
|
|
432
|
+
}
|
|
433
|
+
// Handle contains(text(), 'value')
|
|
434
|
+
const textContainsMatch = condition.match(/^contains\(text\(\),\s*["']([^"']+)["']\)$/);
|
|
435
|
+
if (textContainsMatch) {
|
|
436
|
+
const expectedText = textContainsMatch[1];
|
|
437
|
+
const elementText = ((_b = element.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || "";
|
|
438
|
+
return elementText.includes(expectedText);
|
|
439
|
+
}
|
|
440
|
+
// Handle count(*)=0 (element has no children)
|
|
441
|
+
if (condition === "count(*)=0") {
|
|
442
|
+
return element.children.length === 0;
|
|
443
|
+
}
|
|
444
|
+
// Handle other count conditions
|
|
445
|
+
const countMatch = condition.match(/^count\(\*\)=(\d+)$/);
|
|
446
|
+
if (countMatch) {
|
|
447
|
+
const expectedCount = parseInt(countMatch[1]);
|
|
448
|
+
return element.children.length === expectedCount;
|
|
449
|
+
}
|
|
450
|
+
return true;
|
|
451
|
+
};
|
|
452
|
+
const evaluateXPath = (document, xpath, isShadow = false) => {
|
|
453
|
+
try {
|
|
454
|
+
const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
455
|
+
if (!isShadow) {
|
|
456
|
+
if (result === null) {
|
|
457
|
+
return null;
|
|
458
|
+
}
|
|
459
|
+
return result;
|
|
460
|
+
}
|
|
461
|
+
let cleanPath = xpath;
|
|
462
|
+
let isIndexed = false;
|
|
463
|
+
const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/);
|
|
464
|
+
if (indexedMatch) {
|
|
465
|
+
cleanPath = indexedMatch[1] + indexedMatch[3];
|
|
466
|
+
isIndexed = true;
|
|
467
|
+
}
|
|
468
|
+
const pathParts = cleanPath
|
|
469
|
+
.replace(/^\/\//, "")
|
|
470
|
+
.split("/")
|
|
471
|
+
.map((p) => p.trim())
|
|
472
|
+
.filter((p) => p.length > 0);
|
|
473
|
+
let currentContexts = [document];
|
|
474
|
+
for (let i = 0; i < pathParts.length; i++) {
|
|
475
|
+
const part = pathParts[i];
|
|
476
|
+
const nextContexts = [];
|
|
477
|
+
for (const ctx of currentContexts) {
|
|
478
|
+
const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/);
|
|
479
|
+
let partWithoutPosition = part;
|
|
480
|
+
let requestedPosition = null;
|
|
481
|
+
if (positionalMatch) {
|
|
482
|
+
partWithoutPosition = positionalMatch[1];
|
|
483
|
+
requestedPosition = parseInt(positionalMatch[2]);
|
|
484
|
+
}
|
|
485
|
+
const matched = queryInsideContext(ctx, partWithoutPosition);
|
|
486
|
+
let elementsToAdd = matched;
|
|
487
|
+
if (requestedPosition !== null) {
|
|
488
|
+
const index = requestedPosition - 1; // XPath is 1-based, arrays are 0-based
|
|
489
|
+
if (index >= 0 && index < matched.length) {
|
|
490
|
+
elementsToAdd = [matched[index]];
|
|
491
|
+
}
|
|
492
|
+
else {
|
|
493
|
+
console.warn(`Position ${requestedPosition} out of range (${matched.length} elements found)`);
|
|
494
|
+
elementsToAdd = [];
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
elementsToAdd.forEach((el) => {
|
|
498
|
+
nextContexts.push(el);
|
|
499
|
+
if (el.shadowRoot) {
|
|
500
|
+
nextContexts.push(el.shadowRoot);
|
|
501
|
+
}
|
|
502
|
+
});
|
|
503
|
+
}
|
|
504
|
+
if (nextContexts.length === 0) {
|
|
505
|
+
return null;
|
|
506
|
+
}
|
|
507
|
+
currentContexts = nextContexts;
|
|
508
|
+
}
|
|
509
|
+
if (currentContexts.length > 0) {
|
|
510
|
+
if (isIndexed && indexedMatch) {
|
|
511
|
+
const requestedIndex = parseInt(indexedMatch[2]) - 1;
|
|
512
|
+
if (requestedIndex >= 0 && requestedIndex < currentContexts.length) {
|
|
513
|
+
return currentContexts[requestedIndex];
|
|
514
|
+
}
|
|
515
|
+
else {
|
|
516
|
+
console.warn(`Requested index ${requestedIndex + 1} out of range (${currentContexts.length} elements found)`);
|
|
517
|
+
return null;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
return currentContexts[0];
|
|
521
|
+
}
|
|
522
|
+
return null;
|
|
523
|
+
}
|
|
524
|
+
catch (err) {
|
|
525
|
+
console.error("Critical XPath failure:", xpath, err);
|
|
376
526
|
return null;
|
|
377
527
|
}
|
|
378
528
|
};
|
|
@@ -394,33 +544,41 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
394
544
|
return elements;
|
|
395
545
|
}
|
|
396
546
|
catch (error) {
|
|
397
|
-
console.warn(
|
|
547
|
+
console.warn("XPath evaluation failed:", xpath, error);
|
|
398
548
|
return [];
|
|
399
549
|
}
|
|
400
550
|
};
|
|
401
|
-
//
|
|
551
|
+
// Helper function to detect selector type
|
|
552
|
+
const isXPathSelector = (selector) => {
|
|
553
|
+
return (selector.startsWith("//") ||
|
|
554
|
+
selector.startsWith("/") ||
|
|
555
|
+
selector.startsWith("./"));
|
|
556
|
+
};
|
|
557
|
+
// Enhanced query function to handle iframe, frame, shadow DOM, CSS selectors, and XPath
|
|
402
558
|
const queryElement = (rootElement, selector) => {
|
|
403
|
-
if (!selector.includes(
|
|
559
|
+
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
|
404
560
|
// Check if it's an XPath selector
|
|
405
|
-
if (
|
|
561
|
+
if (isXPathSelector(selector)) {
|
|
406
562
|
return evaluateXPath(rootElement, selector);
|
|
407
563
|
}
|
|
408
564
|
else {
|
|
409
565
|
return rootElement.querySelector(selector);
|
|
410
566
|
}
|
|
411
567
|
}
|
|
412
|
-
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
|
568
|
+
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
|
413
569
|
let currentElement = rootElement;
|
|
414
570
|
for (let i = 0; i < parts.length; i++) {
|
|
415
571
|
if (!currentElement)
|
|
416
572
|
return null;
|
|
417
573
|
// Handle iframe and frame traversal
|
|
418
|
-
if (currentElement.tagName ===
|
|
574
|
+
if (currentElement.tagName === "IFRAME" ||
|
|
575
|
+
currentElement.tagName === "FRAME") {
|
|
419
576
|
try {
|
|
420
|
-
const frameDoc = currentElement.contentDocument ||
|
|
577
|
+
const frameDoc = currentElement.contentDocument ||
|
|
578
|
+
currentElement.contentWindow.document;
|
|
421
579
|
if (!frameDoc)
|
|
422
580
|
return null;
|
|
423
|
-
if (
|
|
581
|
+
if (isXPathSelector(parts[i])) {
|
|
424
582
|
currentElement = evaluateXPath(frameDoc, parts[i]);
|
|
425
583
|
}
|
|
426
584
|
else {
|
|
@@ -434,9 +592,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
434
592
|
}
|
|
435
593
|
}
|
|
436
594
|
let nextElement = null;
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
if (
|
|
595
|
+
// Try regular DOM first
|
|
596
|
+
if ("querySelector" in currentElement) {
|
|
597
|
+
if (isXPathSelector(parts[i])) {
|
|
440
598
|
nextElement = evaluateXPath(currentElement, parts[i]);
|
|
441
599
|
}
|
|
442
600
|
else {
|
|
@@ -444,8 +602,10 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
444
602
|
}
|
|
445
603
|
}
|
|
446
604
|
// Try shadow DOM if not found
|
|
447
|
-
if (!nextElement &&
|
|
448
|
-
|
|
605
|
+
if (!nextElement &&
|
|
606
|
+
"shadowRoot" in currentElement &&
|
|
607
|
+
currentElement.shadowRoot) {
|
|
608
|
+
if (isXPathSelector(parts[i])) {
|
|
449
609
|
nextElement = evaluateXPath(currentElement.shadowRoot, parts[i]);
|
|
450
610
|
}
|
|
451
611
|
else {
|
|
@@ -453,11 +613,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
453
613
|
}
|
|
454
614
|
}
|
|
455
615
|
// Check children's shadow roots if still not found
|
|
456
|
-
if (!nextElement &&
|
|
616
|
+
if (!nextElement && "children" in currentElement) {
|
|
457
617
|
const children = Array.from(currentElement.children || []);
|
|
458
618
|
for (const child of children) {
|
|
459
619
|
if (child.shadowRoot) {
|
|
460
|
-
if (
|
|
620
|
+
if (isXPathSelector(parts[i])) {
|
|
461
621
|
nextElement = evaluateXPath(child.shadowRoot, parts[i]);
|
|
462
622
|
}
|
|
463
623
|
else {
|
|
@@ -472,28 +632,27 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
472
632
|
}
|
|
473
633
|
return currentElement;
|
|
474
634
|
};
|
|
475
|
-
// Enhanced query all function for
|
|
635
|
+
// Enhanced query all function for both contexts
|
|
476
636
|
const queryElementAll = (rootElement, selector) => {
|
|
477
|
-
if (!selector.includes(
|
|
478
|
-
|
|
479
|
-
if (selector.startsWith('//') || selector.startsWith('/') || selector.startsWith('./')) {
|
|
637
|
+
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
|
638
|
+
if (isXPathSelector(selector)) {
|
|
480
639
|
return evaluateXPathAll(rootElement, selector);
|
|
481
640
|
}
|
|
482
641
|
else {
|
|
483
642
|
return Array.from(rootElement.querySelectorAll(selector));
|
|
484
643
|
}
|
|
485
644
|
}
|
|
486
|
-
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
|
645
|
+
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
|
487
646
|
let currentElements = [rootElement];
|
|
488
647
|
for (const part of parts) {
|
|
489
648
|
const nextElements = [];
|
|
490
649
|
for (const element of currentElements) {
|
|
491
650
|
// Handle iframe and frame traversal
|
|
492
|
-
if (element.tagName ===
|
|
651
|
+
if (element.tagName === "IFRAME" || element.tagName === "FRAME") {
|
|
493
652
|
try {
|
|
494
653
|
const frameDoc = element.contentDocument || element.contentWindow.document;
|
|
495
654
|
if (frameDoc) {
|
|
496
|
-
if (
|
|
655
|
+
if (isXPathSelector(part)) {
|
|
497
656
|
nextElements.push(...evaluateXPathAll(frameDoc, part));
|
|
498
657
|
}
|
|
499
658
|
else {
|
|
@@ -509,7 +668,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
509
668
|
else {
|
|
510
669
|
// Regular DOM elements
|
|
511
670
|
if (element.querySelectorAll) {
|
|
512
|
-
if (
|
|
671
|
+
if (isXPathSelector(part)) {
|
|
513
672
|
nextElements.push(...evaluateXPathAll(element, part));
|
|
514
673
|
}
|
|
515
674
|
else {
|
|
@@ -518,7 +677,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
518
677
|
}
|
|
519
678
|
// Shadow DOM elements
|
|
520
679
|
if (element.shadowRoot) {
|
|
521
|
-
if (
|
|
680
|
+
if (isXPathSelector(part)) {
|
|
522
681
|
nextElements.push(...evaluateXPathAll(element.shadowRoot, part));
|
|
523
682
|
}
|
|
524
683
|
else {
|
|
@@ -529,7 +688,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
529
688
|
const children = Array.from(element.children || []);
|
|
530
689
|
for (const child of children) {
|
|
531
690
|
if (child.shadowRoot) {
|
|
532
|
-
if (
|
|
691
|
+
if (isXPathSelector(part)) {
|
|
533
692
|
nextElements.push(...evaluateXPathAll(child.shadowRoot, part));
|
|
534
693
|
}
|
|
535
694
|
else {
|
|
@@ -545,7 +704,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
545
704
|
};
|
|
546
705
|
// Enhanced value extraction with context awareness
|
|
547
706
|
const extractValue = (element, attribute) => {
|
|
548
|
-
var _a, _b, _c, _d, _e
|
|
707
|
+
var _a, _b, _c, _d, _e;
|
|
549
708
|
if (!element)
|
|
550
709
|
return null;
|
|
551
710
|
// Get context-aware base URL
|
|
@@ -557,17 +716,17 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
557
716
|
return shadowContent.trim();
|
|
558
717
|
}
|
|
559
718
|
}
|
|
560
|
-
if (attribute ===
|
|
719
|
+
if (attribute === "innerText") {
|
|
561
720
|
// First try standard innerText/textContent
|
|
562
721
|
let textContent = ((_c = element.innerText) === null || _c === void 0 ? void 0 : _c.trim()) || ((_d = element.textContent) === null || _d === void 0 ? void 0 : _d.trim());
|
|
563
722
|
// If empty, check for common data attributes that might contain the text
|
|
564
723
|
if (!textContent) {
|
|
565
724
|
const dataAttributes = [
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
725
|
+
"data-600",
|
|
726
|
+
"data-text",
|
|
727
|
+
"data-label",
|
|
728
|
+
"data-value",
|
|
729
|
+
"data-content",
|
|
571
730
|
];
|
|
572
731
|
for (const attr of dataAttributes) {
|
|
573
732
|
const dataValue = element.getAttribute(attr);
|
|
@@ -579,140 +738,437 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
579
738
|
}
|
|
580
739
|
return textContent || null;
|
|
581
740
|
}
|
|
582
|
-
else if (attribute ===
|
|
741
|
+
else if (attribute === "innerHTML") {
|
|
583
742
|
return ((_e = element.innerHTML) === null || _e === void 0 ? void 0 : _e.trim()) || null;
|
|
584
743
|
}
|
|
585
|
-
else if (attribute ===
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
744
|
+
else if (attribute === "src" || attribute === "href") {
|
|
745
|
+
if (attribute === "href" && element.tagName !== "A") {
|
|
746
|
+
const parentElement = element.parentElement;
|
|
747
|
+
if (parentElement && parentElement.tagName === "A") {
|
|
748
|
+
const parentHref = parentElement.getAttribute("href");
|
|
749
|
+
if (parentHref) {
|
|
750
|
+
try {
|
|
751
|
+
return new URL(parentHref, baseURL).href;
|
|
752
|
+
}
|
|
753
|
+
catch (e) {
|
|
754
|
+
return parentHref;
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
}
|
|
591
758
|
}
|
|
592
|
-
const
|
|
593
|
-
|
|
759
|
+
const attrValue = element.getAttribute(attribute);
|
|
760
|
+
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
|
761
|
+
if (!dataAttr || dataAttr.trim() === "") {
|
|
762
|
+
if (attribute === "src") {
|
|
763
|
+
const style = window.getComputedStyle(element);
|
|
764
|
+
const bgImage = style.backgroundImage;
|
|
765
|
+
if (bgImage && bgImage !== "none") {
|
|
766
|
+
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
|
767
|
+
return matches ? new URL(matches[1], baseURL).href : null;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
594
770
|
return null;
|
|
595
771
|
}
|
|
596
772
|
try {
|
|
597
|
-
return new URL(
|
|
773
|
+
return new URL(dataAttr, baseURL).href;
|
|
598
774
|
}
|
|
599
775
|
catch (e) {
|
|
600
|
-
console.warn(
|
|
601
|
-
return
|
|
776
|
+
console.warn("Error creating URL from", dataAttr, e);
|
|
777
|
+
return dataAttr;
|
|
602
778
|
}
|
|
603
779
|
}
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
780
|
+
return element.getAttribute(attribute);
|
|
781
|
+
};
|
|
782
|
+
// Enhanced table ancestor finding with context support
|
|
783
|
+
const findTableAncestor = (element) => {
|
|
784
|
+
let currentElement = element;
|
|
785
|
+
const MAX_DEPTH = 5;
|
|
786
|
+
let depth = 0;
|
|
787
|
+
while (currentElement && depth < MAX_DEPTH) {
|
|
788
|
+
// Handle shadow DOM
|
|
789
|
+
if (currentElement.getRootNode() instanceof ShadowRoot) {
|
|
790
|
+
currentElement = currentElement.getRootNode().host;
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
if (currentElement.tagName === "TD") {
|
|
794
|
+
return { type: "TD", element: currentElement };
|
|
795
|
+
}
|
|
796
|
+
else if (currentElement.tagName === "TR") {
|
|
797
|
+
return { type: "TR", element: currentElement };
|
|
798
|
+
}
|
|
799
|
+
// Handle iframe and frame crossing
|
|
800
|
+
if (currentElement.tagName === "IFRAME" ||
|
|
801
|
+
currentElement.tagName === "FRAME") {
|
|
802
|
+
try {
|
|
803
|
+
currentElement = currentElement.contentDocument.body;
|
|
613
804
|
}
|
|
614
|
-
|
|
805
|
+
catch (e) {
|
|
806
|
+
return null;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
else {
|
|
810
|
+
currentElement = currentElement.parentElement;
|
|
615
811
|
}
|
|
812
|
+
depth++;
|
|
813
|
+
}
|
|
814
|
+
return null;
|
|
815
|
+
};
|
|
816
|
+
// Helper function to get cell index
|
|
817
|
+
const getCellIndex = (td) => {
|
|
818
|
+
if (td.getRootNode() instanceof ShadowRoot) {
|
|
819
|
+
const shadowRoot = td.getRootNode();
|
|
820
|
+
const allCells = Array.from(shadowRoot.querySelectorAll("td"));
|
|
821
|
+
return allCells.indexOf(td);
|
|
822
|
+
}
|
|
823
|
+
let index = 0;
|
|
824
|
+
let sibling = td;
|
|
825
|
+
while ((sibling = sibling.previousElementSibling)) {
|
|
826
|
+
index++;
|
|
827
|
+
}
|
|
828
|
+
return index;
|
|
829
|
+
};
|
|
830
|
+
// Helper function to check for TH elements
|
|
831
|
+
const hasThElement = (row, tableFields) => {
|
|
832
|
+
for (const [_, { selector }] of Object.entries(tableFields)) {
|
|
833
|
+
const element = queryElement(row, selector);
|
|
834
|
+
if (element) {
|
|
835
|
+
let current = element;
|
|
836
|
+
while (current && current !== row) {
|
|
837
|
+
if (current.getRootNode() instanceof ShadowRoot) {
|
|
838
|
+
current = current.getRootNode().host;
|
|
839
|
+
continue;
|
|
840
|
+
}
|
|
841
|
+
if (current.tagName === "TH")
|
|
842
|
+
return true;
|
|
843
|
+
if (current.tagName === "IFRAME" || current.tagName === "FRAME") {
|
|
844
|
+
try {
|
|
845
|
+
current = current.contentDocument.body;
|
|
846
|
+
}
|
|
847
|
+
catch (e) {
|
|
848
|
+
break;
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
current = current.parentElement;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
return false;
|
|
858
|
+
};
|
|
859
|
+
// Helper function to filter rows
|
|
860
|
+
const filterRowsBasedOnTag = (rows, tableFields) => {
|
|
861
|
+
for (const row of rows) {
|
|
862
|
+
if (hasThElement(row, tableFields)) {
|
|
863
|
+
return rows;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
return rows.filter((row) => {
|
|
867
|
+
const directTH = row.getElementsByTagName("TH").length === 0;
|
|
868
|
+
const shadowTH = row.shadowRoot
|
|
869
|
+
? row.shadowRoot.querySelector("th") === null
|
|
870
|
+
: true;
|
|
871
|
+
return directTH && shadowTH;
|
|
872
|
+
});
|
|
873
|
+
};
|
|
874
|
+
// Class similarity comparison functions
|
|
875
|
+
const calculateClassSimilarity = (classList1, classList2) => {
|
|
876
|
+
const set1 = new Set(classList1);
|
|
877
|
+
const set2 = new Set(classList2);
|
|
878
|
+
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
879
|
+
const union = new Set([...set1, ...set2]);
|
|
880
|
+
return intersection.size / union.size;
|
|
881
|
+
};
|
|
882
|
+
// Enhanced similar elements finding with context support
|
|
883
|
+
const findSimilarElements = (baseElement, similarityThreshold = 0.7) => {
|
|
884
|
+
const baseClasses = Array.from(baseElement.classList);
|
|
885
|
+
if (baseClasses.length === 0)
|
|
886
|
+
return [];
|
|
887
|
+
const allElements = [];
|
|
888
|
+
// Get elements from main document
|
|
889
|
+
allElements.push(...document.getElementsByTagName(baseElement.tagName));
|
|
890
|
+
// Get elements from shadow DOM
|
|
891
|
+
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
|
892
|
+
const shadowHost = baseElement.getRootNode().host;
|
|
893
|
+
allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName));
|
|
894
|
+
}
|
|
895
|
+
// Get elements from iframes and frames
|
|
896
|
+
const frames = [
|
|
897
|
+
...Array.from(document.getElementsByTagName("iframe")),
|
|
898
|
+
...Array.from(document.getElementsByTagName("frame")),
|
|
899
|
+
];
|
|
900
|
+
for (const frame of frames) {
|
|
616
901
|
try {
|
|
617
|
-
|
|
902
|
+
const frameDoc = frame.contentDocument || frame.contentWindow.document;
|
|
903
|
+
allElements.push(...frameDoc.getElementsByTagName(baseElement.tagName));
|
|
618
904
|
}
|
|
619
905
|
catch (e) {
|
|
620
|
-
console.warn(
|
|
621
|
-
return dataAttr;
|
|
906
|
+
console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e);
|
|
622
907
|
}
|
|
623
908
|
}
|
|
624
|
-
return
|
|
909
|
+
return allElements.filter((element) => {
|
|
910
|
+
if (element === baseElement)
|
|
911
|
+
return false;
|
|
912
|
+
const similarity = calculateClassSimilarity(baseClasses, Array.from(element.classList));
|
|
913
|
+
return similarity >= similarityThreshold;
|
|
914
|
+
});
|
|
915
|
+
};
|
|
916
|
+
const tryFallbackSelector = (rootElement, originalSelector) => {
|
|
917
|
+
let element = queryElement(rootElement, originalSelector);
|
|
918
|
+
if (!element && originalSelector.includes("nth-child")) {
|
|
919
|
+
const match = originalSelector.match(/nth-child\((\d+)\)/);
|
|
920
|
+
if (match) {
|
|
921
|
+
const position = parseInt(match[1], 10);
|
|
922
|
+
for (let i = position - 1; i >= 1; i--) {
|
|
923
|
+
const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`);
|
|
924
|
+
element = queryElement(rootElement, fallbackSelector);
|
|
925
|
+
if (element)
|
|
926
|
+
break;
|
|
927
|
+
}
|
|
928
|
+
if (!element) {
|
|
929
|
+
const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, "");
|
|
930
|
+
element = queryElement(rootElement, baseSelector);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
return element;
|
|
625
935
|
};
|
|
626
936
|
// Create indexed XPath for specific container instance
|
|
627
937
|
const createIndexedXPath = (childSelector, listSelector, containerIndex) => {
|
|
628
|
-
console.log(`Creating indexed XPath for container ${containerIndex}`);
|
|
629
|
-
console.log(`Child selector: ${childSelector}`);
|
|
630
|
-
console.log(`List selector: ${listSelector}`);
|
|
631
938
|
// Check if the child selector contains the list selector pattern
|
|
632
|
-
if (childSelector.includes(listSelector.replace(
|
|
939
|
+
if (childSelector.includes(listSelector.replace("//", ""))) {
|
|
633
940
|
// Replace the list selector part with indexed version
|
|
634
|
-
const listPattern = listSelector.replace(
|
|
941
|
+
const listPattern = listSelector.replace("//", "");
|
|
635
942
|
const indexedListSelector = `(${listSelector})[${containerIndex}]`;
|
|
636
943
|
const indexedSelector = childSelector.replace(`//${listPattern}`, indexedListSelector);
|
|
637
|
-
console.log(`Generated indexed selector: ${indexedSelector}`);
|
|
638
944
|
return indexedSelector;
|
|
639
945
|
}
|
|
640
946
|
else {
|
|
641
947
|
// If pattern doesn't match, create a more generic indexed selector
|
|
642
|
-
|
|
643
|
-
return `(${listSelector})[${containerIndex}]${childSelector.replace('//', '/')}`;
|
|
948
|
+
return `(${listSelector})[${containerIndex}]${childSelector.replace("//", "/")}`;
|
|
644
949
|
}
|
|
645
950
|
};
|
|
646
|
-
// Main scraping logic
|
|
647
|
-
console.log(
|
|
648
|
-
console.log(
|
|
649
|
-
console.log(
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
console.log(`📦 Found ${containers.length} list containers`);
|
|
951
|
+
// Main scraping logic with unified support for both CSS and XPath
|
|
952
|
+
console.log("🚀 Starting unified list data extraction");
|
|
953
|
+
console.log("List Selector:", listSelector);
|
|
954
|
+
console.log("Fields:", fields);
|
|
955
|
+
let containers = queryElementAll(document, listSelector);
|
|
956
|
+
containers = Array.from(containers);
|
|
653
957
|
if (containers.length === 0) {
|
|
654
|
-
console.warn(
|
|
958
|
+
console.warn("❌ No containers found for listSelector:", listSelector);
|
|
655
959
|
return [];
|
|
656
960
|
}
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
const
|
|
663
|
-
const
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
961
|
+
console.log(`📦 Found ${containers.length} list containers`);
|
|
962
|
+
// For CSS selectors, try to find similar containers if needed
|
|
963
|
+
if (!isXPathSelector(listSelector) &&
|
|
964
|
+
limit > 1 &&
|
|
965
|
+
containers.length === 1) {
|
|
966
|
+
const baseContainer = containers[0];
|
|
967
|
+
const similarContainers = findSimilarElements(baseContainer);
|
|
968
|
+
if (similarContainers.length > 0) {
|
|
969
|
+
const newContainers = similarContainers.filter((container) => !container.matches(listSelector));
|
|
970
|
+
containers = [...containers, ...newContainers];
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
const containerFields = containers.map(() => ({
|
|
974
|
+
tableFields: {},
|
|
975
|
+
nonTableFields: {},
|
|
976
|
+
}));
|
|
977
|
+
// For XPath selectors, use the new approach
|
|
978
|
+
if (isXPathSelector(listSelector)) {
|
|
979
|
+
const extractedData = [];
|
|
980
|
+
const containersToProcess = Math.min(containers.length, limit);
|
|
981
|
+
for (let containerIndex = 0; containerIndex < containersToProcess; containerIndex++) {
|
|
982
|
+
const record = {};
|
|
983
|
+
for (const [label, field] of Object.entries(fields)) {
|
|
984
|
+
let element = null;
|
|
985
|
+
if (isXPathSelector(field.selector)) {
|
|
986
|
+
// Create indexed absolute XPath
|
|
987
|
+
const indexedSelector = createIndexedXPath(field.selector, listSelector, containerIndex + 1);
|
|
988
|
+
element = evaluateXPath(document, indexedSelector, field.isShadow);
|
|
989
|
+
}
|
|
990
|
+
else {
|
|
991
|
+
// Fallback for CSS selectors within XPath containers
|
|
992
|
+
const container = containers[containerIndex];
|
|
993
|
+
element = queryElement(container, field.selector);
|
|
994
|
+
}
|
|
678
995
|
if (element) {
|
|
679
|
-
|
|
996
|
+
const value = extractValue(element, field.attribute);
|
|
997
|
+
if (value !== null && value !== "") {
|
|
998
|
+
record[label] = value;
|
|
999
|
+
}
|
|
1000
|
+
else {
|
|
1001
|
+
record[label] = "";
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
else {
|
|
1005
|
+
record[label] = "";
|
|
680
1006
|
}
|
|
681
1007
|
}
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
element = queryElement(container, field.selector);
|
|
1008
|
+
if (Object.values(record).some((value) => value !== "")) {
|
|
1009
|
+
extractedData.push(record);
|
|
685
1010
|
}
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
1011
|
+
}
|
|
1012
|
+
console.log(`📊 Total records extracted: ${extractedData.length}`);
|
|
1013
|
+
return extractedData;
|
|
1014
|
+
}
|
|
1015
|
+
// For CSS selectors, use the original table-aware approach
|
|
1016
|
+
containers.forEach((container, containerIndex) => {
|
|
1017
|
+
for (const [label, field] of Object.entries(fields)) {
|
|
1018
|
+
const sampleElement = queryElement(container, field.selector);
|
|
1019
|
+
if (sampleElement) {
|
|
1020
|
+
const ancestor = findTableAncestor(sampleElement);
|
|
1021
|
+
if (ancestor) {
|
|
1022
|
+
containerFields[containerIndex].tableFields[label] = Object.assign(Object.assign({}, field), { tableContext: ancestor.type, cellIndex: ancestor.type === "TD" ? getCellIndex(ancestor.element) : -1 });
|
|
692
1023
|
}
|
|
693
1024
|
else {
|
|
694
|
-
|
|
695
|
-
record[label] = '';
|
|
1025
|
+
containerFields[containerIndex].nonTableFields[label] = field;
|
|
696
1026
|
}
|
|
697
1027
|
}
|
|
698
1028
|
else {
|
|
699
|
-
|
|
700
|
-
record[label] = '';
|
|
1029
|
+
containerFields[containerIndex].nonTableFields[label] = field;
|
|
701
1030
|
}
|
|
702
1031
|
}
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
1032
|
+
});
|
|
1033
|
+
const tableData = [];
|
|
1034
|
+
const nonTableData = [];
|
|
1035
|
+
// Process table data with support for iframes, frames, and shadow DOM
|
|
1036
|
+
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
1037
|
+
const container = containers[containerIndex];
|
|
1038
|
+
const { tableFields } = containerFields[containerIndex];
|
|
1039
|
+
if (Object.keys(tableFields).length > 0) {
|
|
1040
|
+
const firstField = Object.values(tableFields)[0];
|
|
1041
|
+
const firstElement = queryElement(container, firstField.selector);
|
|
1042
|
+
let tableContext = firstElement;
|
|
1043
|
+
// Find table context including iframe, frame and shadow DOM
|
|
1044
|
+
while (tableContext &&
|
|
1045
|
+
tableContext.tagName !== "TABLE" &&
|
|
1046
|
+
tableContext !== container) {
|
|
1047
|
+
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
|
1048
|
+
tableContext = tableContext.getRootNode().host;
|
|
1049
|
+
continue;
|
|
1050
|
+
}
|
|
1051
|
+
if (tableContext.tagName === "IFRAME" ||
|
|
1052
|
+
tableContext.tagName === "FRAME") {
|
|
1053
|
+
try {
|
|
1054
|
+
tableContext = tableContext.contentDocument.body;
|
|
1055
|
+
}
|
|
1056
|
+
catch (e) {
|
|
1057
|
+
break;
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
else {
|
|
1061
|
+
tableContext = tableContext.parentElement;
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
if (tableContext) {
|
|
1065
|
+
// Get rows from all contexts
|
|
1066
|
+
const rows = [];
|
|
1067
|
+
// Get rows from regular DOM
|
|
1068
|
+
rows.push(...tableContext.getElementsByTagName("TR"));
|
|
1069
|
+
// Get rows from shadow DOM
|
|
1070
|
+
if (tableContext.shadowRoot) {
|
|
1071
|
+
rows.push(...tableContext.shadowRoot.getElementsByTagName("TR"));
|
|
1072
|
+
}
|
|
1073
|
+
// Get rows from iframes and frames
|
|
1074
|
+
if (tableContext.tagName === "IFRAME" ||
|
|
1075
|
+
tableContext.tagName === "FRAME") {
|
|
1076
|
+
try {
|
|
1077
|
+
const frameDoc = tableContext.contentDocument ||
|
|
1078
|
+
tableContext.contentWindow.document;
|
|
1079
|
+
rows.push(...frameDoc.getElementsByTagName("TR"));
|
|
1080
|
+
}
|
|
1081
|
+
catch (e) {
|
|
1082
|
+
console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e);
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
const processedRows = filterRowsBasedOnTag(rows, tableFields);
|
|
1086
|
+
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
|
|
1087
|
+
const record = {};
|
|
1088
|
+
const currentRow = processedRows[rowIndex];
|
|
1089
|
+
for (const [label, { selector, attribute, cellIndex },] of Object.entries(tableFields)) {
|
|
1090
|
+
let element = null;
|
|
1091
|
+
if (cellIndex >= 0) {
|
|
1092
|
+
// Get TD element considering both contexts
|
|
1093
|
+
let td = currentRow.children[cellIndex];
|
|
1094
|
+
// Check shadow DOM for td
|
|
1095
|
+
if (!td && currentRow.shadowRoot) {
|
|
1096
|
+
const shadowCells = currentRow.shadowRoot.children;
|
|
1097
|
+
if (shadowCells && shadowCells.length > cellIndex) {
|
|
1098
|
+
td = shadowCells[cellIndex];
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
if (td) {
|
|
1102
|
+
element = queryElement(td, selector);
|
|
1103
|
+
if (!element &&
|
|
1104
|
+
selector
|
|
1105
|
+
.split(/(?:>>|:>>)/)
|
|
1106
|
+
.pop()
|
|
1107
|
+
.includes("td:nth-child")) {
|
|
1108
|
+
element = td;
|
|
1109
|
+
}
|
|
1110
|
+
if (!element) {
|
|
1111
|
+
const tagOnlySelector = selector.split(".")[0];
|
|
1112
|
+
element = queryElement(td, tagOnlySelector);
|
|
1113
|
+
}
|
|
1114
|
+
if (!element) {
|
|
1115
|
+
let currentElement = td;
|
|
1116
|
+
while (currentElement &&
|
|
1117
|
+
currentElement.children.length > 0) {
|
|
1118
|
+
let foundContentChild = false;
|
|
1119
|
+
for (const child of currentElement.children) {
|
|
1120
|
+
if (extractValue(child, attribute)) {
|
|
1121
|
+
currentElement = child;
|
|
1122
|
+
foundContentChild = true;
|
|
1123
|
+
break;
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
if (!foundContentChild)
|
|
1127
|
+
break;
|
|
1128
|
+
}
|
|
1129
|
+
element = currentElement;
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
else {
|
|
1134
|
+
element = queryElement(currentRow, selector);
|
|
1135
|
+
}
|
|
1136
|
+
if (element) {
|
|
1137
|
+
record[label] = extractValue(element, attribute);
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
if (Object.keys(record).length > 0) {
|
|
1141
|
+
tableData.push(record);
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
707
1145
|
}
|
|
708
|
-
|
|
709
|
-
|
|
1146
|
+
}
|
|
1147
|
+
// Process non-table data with all contexts support
|
|
1148
|
+
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
1149
|
+
if (nonTableData.length >= limit)
|
|
1150
|
+
break;
|
|
1151
|
+
const container = containers[containerIndex];
|
|
1152
|
+
const { nonTableFields } = containerFields[containerIndex];
|
|
1153
|
+
if (Object.keys(nonTableFields).length > 0) {
|
|
1154
|
+
const record = {};
|
|
1155
|
+
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
|
|
1156
|
+
// Get the last part of the selector after any context delimiter
|
|
1157
|
+
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
|
1158
|
+
const element = tryFallbackSelector(container, relativeSelector);
|
|
1159
|
+
if (element) {
|
|
1160
|
+
record[label] = extractValue(element, attribute);
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
if (Object.keys(record).length > 0) {
|
|
1164
|
+
nonTableData.push(record);
|
|
1165
|
+
}
|
|
710
1166
|
}
|
|
711
1167
|
}
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
console.log(
|
|
715
|
-
return
|
|
1168
|
+
// Merge and limit the results
|
|
1169
|
+
const scrapedData = [...tableData, ...nonTableData];
|
|
1170
|
+
console.log(`📊 Total records extracted: ${scrapedData.length}`);
|
|
1171
|
+
return scrapedData;
|
|
716
1172
|
});
|
|
717
1173
|
};
|
|
718
1174
|
/**
|
package/build/interpret.js
CHANGED
|
@@ -829,9 +829,9 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
829
829
|
if (checkLimit())
|
|
830
830
|
return allResults;
|
|
831
831
|
let loadMoreCounter = 0;
|
|
832
|
-
let previousResultCount = allResults.length;
|
|
833
|
-
let noNewItemsCounter = 0;
|
|
834
|
-
const MAX_NO_NEW_ITEMS = 2;
|
|
832
|
+
// let previousResultCount = allResults.length;
|
|
833
|
+
// let noNewItemsCounter = 0;
|
|
834
|
+
// const MAX_NO_NEW_ITEMS = 2;
|
|
835
835
|
while (true) {
|
|
836
836
|
// Find working button with retry mechanism
|
|
837
837
|
const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
|
|
@@ -888,20 +888,19 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
888
888
|
const heightChanged = currentHeight !== previousHeight;
|
|
889
889
|
previousHeight = currentHeight;
|
|
890
890
|
yield scrapeCurrentPage();
|
|
891
|
-
const currentResultCount = allResults.length;
|
|
892
|
-
const newItemsAdded = currentResultCount > previousResultCount;
|
|
893
|
-
if (!newItemsAdded) {
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
}
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
}
|
|
891
|
+
// const currentResultCount = allResults.length;
|
|
892
|
+
// const newItemsAdded = currentResultCount > previousResultCount;
|
|
893
|
+
// if (!newItemsAdded) {
|
|
894
|
+
// noNewItemsCounter++;
|
|
895
|
+
// debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`);
|
|
896
|
+
// if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) {
|
|
897
|
+
// debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`);
|
|
898
|
+
// return allResults;
|
|
899
|
+
// }
|
|
900
|
+
// } else {
|
|
901
|
+
// noNewItemsCounter = 0;
|
|
902
|
+
// previousResultCount = currentResultCount;
|
|
903
|
+
// }
|
|
905
904
|
if (checkLimit())
|
|
906
905
|
return allResults;
|
|
907
906
|
if (!heightChanged) {
|