mx-cloud 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/browserSide/scraper.js +14 -0
- package/package.json +1 -1
|
@@ -466,6 +466,20 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
466
466
|
return element.innerHTML.trim();
|
|
467
467
|
}
|
|
468
468
|
else if (attribute === 'src' || attribute === 'href') {
|
|
469
|
+
if (attribute === 'href' && element.tagName !== 'A') {
|
|
470
|
+
const parentElement = element.parentElement;
|
|
471
|
+
if (parentElement && parentElement.tagName === 'A') {
|
|
472
|
+
const parentHref = parentElement.getAttribute('href');
|
|
473
|
+
if (parentHref) {
|
|
474
|
+
try {
|
|
475
|
+
return new URL(parentHref, baseURL).href;
|
|
476
|
+
}
|
|
477
|
+
catch (e) {
|
|
478
|
+
return parentHref;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
469
483
|
const attrValue = element.getAttribute(attribute);
|
|
470
484
|
const dataAttr = attrValue || element.getAttribute('data-' + attribute);
|
|
471
485
|
if (!dataAttr || dataAttr.trim() === '') {
|