maxun-core 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/browserSide/scraper.js +74 -44
- package/build/interpret.js +75 -39
- package/package.json +1 -1
|
@@ -174,7 +174,6 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
174
174
|
if (!config.selector.includes('>>') && !config.selector.includes(':>>')) {
|
|
175
175
|
return Array.from(document.querySelectorAll(config.selector));
|
|
176
176
|
}
|
|
177
|
-
// First handle iframe traversal if present
|
|
178
177
|
if (config.selector.includes(':>>')) {
|
|
179
178
|
const parts = config.selector.split(':>>').map(s => s.trim());
|
|
180
179
|
let currentElements = [document];
|
|
@@ -185,24 +184,42 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
185
184
|
const isLast = i === parts.length - 1;
|
|
186
185
|
for (const element of currentElements) {
|
|
187
186
|
try {
|
|
188
|
-
// For document or iframe document
|
|
189
187
|
const doc = element.contentDocument || element || ((_a = element.contentWindow) === null || _a === void 0 ? void 0 : _a.document);
|
|
190
188
|
if (!doc)
|
|
191
189
|
continue;
|
|
192
|
-
|
|
190
|
+
if (part.startsWith('frame[name=') || part.startsWith('iframe[name=')) {
|
|
191
|
+
const nameMatch = part.match(/\[name=['"]([^'"]+)['"]\]/);
|
|
192
|
+
if (nameMatch && nameMatch[1]) {
|
|
193
|
+
const frameName = nameMatch[1];
|
|
194
|
+
let foundFrames = [];
|
|
195
|
+
if (doc.getElementsByName && typeof doc.getElementsByName === 'function') {
|
|
196
|
+
foundFrames = Array.from(doc.getElementsByName(frameName))
|
|
197
|
+
.filter(el => el.tagName === 'FRAME' || el.tagName === 'IFRAME');
|
|
198
|
+
}
|
|
199
|
+
if (foundFrames.length === 0) {
|
|
200
|
+
const framesBySelector = Array.from(doc.querySelectorAll(`frame[name="${frameName}"], iframe[name="${frameName}"]`));
|
|
201
|
+
foundFrames = framesBySelector;
|
|
202
|
+
}
|
|
203
|
+
if (isLast) {
|
|
204
|
+
nextElements.push(...foundFrames);
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
nextElements.push(...foundFrames);
|
|
208
|
+
}
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
193
212
|
const found = Array.from(doc.querySelectorAll(part));
|
|
194
213
|
if (isLast) {
|
|
195
|
-
// If it's the last part, keep all matching elements
|
|
196
214
|
nextElements.push(...found);
|
|
197
215
|
}
|
|
198
216
|
else {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
nextElements.push(...iframes);
|
|
217
|
+
const frames = found.filter(el => el.tagName === 'IFRAME' || el.tagName === 'FRAME');
|
|
218
|
+
nextElements.push(...frames);
|
|
202
219
|
}
|
|
203
220
|
}
|
|
204
221
|
catch (error) {
|
|
205
|
-
console.warn('Cannot access iframe content:', error, {
|
|
222
|
+
console.warn('Cannot access iframe/frame content:', error, {
|
|
206
223
|
part,
|
|
207
224
|
element,
|
|
208
225
|
index: i
|
|
@@ -242,13 +259,19 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
242
259
|
}
|
|
243
260
|
return [];
|
|
244
261
|
}
|
|
245
|
-
// Modified to handle iframe context for URL resolution
|
|
246
262
|
function getElementValue(element, attribute) {
|
|
247
|
-
var _a, _b, _c, _d, _e;
|
|
263
|
+
var _a, _b, _c, _d, _e, _f;
|
|
248
264
|
if (!element)
|
|
249
265
|
return null;
|
|
250
|
-
|
|
251
|
-
|
|
266
|
+
let baseURL;
|
|
267
|
+
try {
|
|
268
|
+
baseURL = ((_b = (_a = element.ownerDocument) === null || _a === void 0 ? void 0 : _a.location) === null || _b === void 0 ? void 0 : _b.href) ||
|
|
269
|
+
((_c = element.ownerDocument) === null || _c === void 0 ? void 0 : _c.baseURI) ||
|
|
270
|
+
window.location.origin;
|
|
271
|
+
}
|
|
272
|
+
catch (e) {
|
|
273
|
+
baseURL = window.location.origin;
|
|
274
|
+
}
|
|
252
275
|
switch (attribute) {
|
|
253
276
|
case 'href': {
|
|
254
277
|
const relativeHref = element.getAttribute('href');
|
|
@@ -259,11 +282,15 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
259
282
|
return relativeSrc ? new URL(relativeSrc, baseURL).href : null;
|
|
260
283
|
}
|
|
261
284
|
case 'innerText':
|
|
262
|
-
return (
|
|
285
|
+
return (_d = element.innerText) === null || _d === void 0 ? void 0 : _d.trim();
|
|
263
286
|
case 'textContent':
|
|
264
|
-
return (
|
|
287
|
+
return (_e = element.textContent) === null || _e === void 0 ? void 0 : _e.trim();
|
|
288
|
+
case 'innerHTML':
|
|
289
|
+
return element.innerHTML;
|
|
290
|
+
case 'outerHTML':
|
|
291
|
+
return element.outerHTML;
|
|
265
292
|
default:
|
|
266
|
-
return element.getAttribute(attribute) || ((
|
|
293
|
+
return element.getAttribute(attribute) || ((_f = element.innerText) === null || _f === void 0 ? void 0 : _f.trim());
|
|
267
294
|
}
|
|
268
295
|
}
|
|
269
296
|
// Rest of the functions remain largely the same
|
|
@@ -332,7 +359,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
332
359
|
*/
|
|
333
360
|
window.scrapeList = function ({ listSelector, fields, limit = 10 }) {
|
|
334
361
|
return __awaiter(this, void 0, void 0, function* () {
|
|
335
|
-
// Enhanced query function to handle
|
|
362
|
+
// Enhanced query function to handle iframe, frame and shadow DOM
|
|
336
363
|
const queryElement = (rootElement, selector) => {
|
|
337
364
|
if (!selector.includes('>>') && !selector.includes(':>>')) {
|
|
338
365
|
return rootElement.querySelector(selector);
|
|
@@ -342,15 +369,15 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
342
369
|
for (let i = 0; i < parts.length; i++) {
|
|
343
370
|
if (!currentElement)
|
|
344
371
|
return null;
|
|
345
|
-
// Handle iframe traversal
|
|
346
|
-
if (currentElement.tagName === 'IFRAME') {
|
|
372
|
+
// Handle iframe and frame traversal
|
|
373
|
+
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
|
|
347
374
|
try {
|
|
348
|
-
const
|
|
349
|
-
currentElement =
|
|
375
|
+
const frameDoc = currentElement.contentDocument || currentElement.contentWindow.document;
|
|
376
|
+
currentElement = frameDoc.querySelector(parts[i]);
|
|
350
377
|
continue;
|
|
351
378
|
}
|
|
352
379
|
catch (e) {
|
|
353
|
-
console.warn(
|
|
380
|
+
console.warn(`Cannot access ${currentElement.tagName.toLowerCase()} content:`, e);
|
|
354
381
|
return null;
|
|
355
382
|
}
|
|
356
383
|
}
|
|
@@ -385,14 +412,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
385
412
|
for (const part of parts) {
|
|
386
413
|
const nextElements = [];
|
|
387
414
|
for (const element of currentElements) {
|
|
388
|
-
// Handle iframe traversal
|
|
389
|
-
if (element.tagName === 'IFRAME') {
|
|
415
|
+
// Handle iframe and frame traversal
|
|
416
|
+
if (element.tagName === 'IFRAME' || element.tagName === 'FRAME') {
|
|
390
417
|
try {
|
|
391
|
-
const
|
|
392
|
-
nextElements.push(...
|
|
418
|
+
const frameDoc = element.contentDocument || element.contentWindow.document;
|
|
419
|
+
nextElements.push(...frameDoc.querySelectorAll(part));
|
|
393
420
|
}
|
|
394
421
|
catch (e) {
|
|
395
|
-
console.warn(
|
|
422
|
+
console.warn(`Cannot access ${element.tagName.toLowerCase()} content:`, e);
|
|
396
423
|
continue;
|
|
397
424
|
}
|
|
398
425
|
}
|
|
@@ -461,8 +488,8 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
461
488
|
else if (currentElement.tagName === 'TR') {
|
|
462
489
|
return { type: 'TR', element: currentElement };
|
|
463
490
|
}
|
|
464
|
-
// Handle iframe crossing
|
|
465
|
-
if (currentElement.tagName === 'IFRAME') {
|
|
491
|
+
// Handle iframe and frame crossing
|
|
492
|
+
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
|
|
466
493
|
try {
|
|
467
494
|
currentElement = currentElement.contentDocument.body;
|
|
468
495
|
}
|
|
@@ -504,7 +531,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
504
531
|
}
|
|
505
532
|
if (current.tagName === 'TH')
|
|
506
533
|
return true;
|
|
507
|
-
if (current.tagName === 'IFRAME') {
|
|
534
|
+
if (current.tagName === 'IFRAME' || current.tagName === 'FRAME') {
|
|
508
535
|
try {
|
|
509
536
|
current = current.contentDocument.body;
|
|
510
537
|
}
|
|
@@ -556,15 +583,18 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
556
583
|
const shadowHost = baseElement.getRootNode().host;
|
|
557
584
|
allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName));
|
|
558
585
|
}
|
|
559
|
-
// Get elements from iframes
|
|
560
|
-
const
|
|
561
|
-
|
|
586
|
+
// Get elements from iframes and frames
|
|
587
|
+
const frames = [
|
|
588
|
+
...Array.from(document.getElementsByTagName('iframe')),
|
|
589
|
+
...Array.from(document.getElementsByTagName('frame'))
|
|
590
|
+
];
|
|
591
|
+
for (const frame of frames) {
|
|
562
592
|
try {
|
|
563
|
-
const
|
|
564
|
-
allElements.push(...
|
|
593
|
+
const frameDoc = frame.contentDocument || frame.contentWindow.document;
|
|
594
|
+
allElements.push(...frameDoc.getElementsByTagName(baseElement.tagName));
|
|
565
595
|
}
|
|
566
596
|
catch (e) {
|
|
567
|
-
console.warn(
|
|
597
|
+
console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e);
|
|
568
598
|
}
|
|
569
599
|
}
|
|
570
600
|
return allElements.filter(element => {
|
|
@@ -611,7 +641,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
611
641
|
});
|
|
612
642
|
const tableData = [];
|
|
613
643
|
const nonTableData = [];
|
|
614
|
-
// Process table data with
|
|
644
|
+
// Process table data with support for iframes, frames, and shadow DOM
|
|
615
645
|
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
616
646
|
const container = containers[containerIndex];
|
|
617
647
|
const { tableFields } = containerFields[containerIndex];
|
|
@@ -619,13 +649,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
619
649
|
const firstField = Object.values(tableFields)[0];
|
|
620
650
|
const firstElement = queryElement(container, firstField.selector);
|
|
621
651
|
let tableContext = firstElement;
|
|
622
|
-
// Find table context including
|
|
652
|
+
// Find table context including iframe, frame and shadow DOM
|
|
623
653
|
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
|
|
624
654
|
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
|
625
655
|
tableContext = tableContext.getRootNode().host;
|
|
626
656
|
continue;
|
|
627
657
|
}
|
|
628
|
-
if (tableContext.tagName === 'IFRAME') {
|
|
658
|
+
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
|
629
659
|
try {
|
|
630
660
|
tableContext = tableContext.contentDocument.body;
|
|
631
661
|
}
|
|
@@ -646,14 +676,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
646
676
|
if (tableContext.shadowRoot) {
|
|
647
677
|
rows.push(...tableContext.shadowRoot.getElementsByTagName('TR'));
|
|
648
678
|
}
|
|
649
|
-
// Get rows from iframes
|
|
650
|
-
if (tableContext.tagName === 'IFRAME') {
|
|
679
|
+
// Get rows from iframes and frames
|
|
680
|
+
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
|
651
681
|
try {
|
|
652
|
-
const
|
|
653
|
-
rows.push(...
|
|
682
|
+
const frameDoc = tableContext.contentDocument || tableContext.contentWindow.document;
|
|
683
|
+
rows.push(...frameDoc.getElementsByTagName('TR'));
|
|
654
684
|
}
|
|
655
685
|
catch (e) {
|
|
656
|
-
console.warn(
|
|
686
|
+
console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e);
|
|
657
687
|
}
|
|
658
688
|
}
|
|
659
689
|
const processedRows = filterRowsBasedOnTag(rows, tableFields);
|
|
@@ -713,7 +743,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|
|
713
743
|
}
|
|
714
744
|
}
|
|
715
745
|
}
|
|
716
|
-
// Process non-table data with
|
|
746
|
+
// Process non-table data with all contexts support
|
|
717
747
|
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
718
748
|
if (nonTableData.length >= limit)
|
|
719
749
|
break;
|
package/build/interpret.js
CHANGED
|
@@ -234,6 +234,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
234
234
|
parsedSuperset[key] = Array.isArray(superset[key])
|
|
235
235
|
? (0, utils_1.arrayToObject)(superset[key])
|
|
236
236
|
: superset[key];
|
|
237
|
+
if ((key === 'url' || key === 'selectors') &&
|
|
238
|
+
Array.isArray(value) && Array.isArray(superset[key]) &&
|
|
239
|
+
value.length === 0 && superset[key].length === 0) {
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
237
242
|
if (key === 'selectors' && Array.isArray(value) && Array.isArray(superset[key])) {
|
|
238
243
|
return value.some(selector => superset[key].includes(selector));
|
|
239
244
|
}
|
|
@@ -495,29 +500,45 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
495
500
|
return false;
|
|
496
501
|
};
|
|
497
502
|
// Enhanced button finder with retry mechanism
|
|
498
|
-
const findWorkingButton = (selectors
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
503
|
+
const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
|
|
504
|
+
let updatedSelectors = [...selectors];
|
|
505
|
+
for (let i = 0; i < selectors.length; i++) {
|
|
506
|
+
const selector = selectors[i];
|
|
507
|
+
let retryCount = 0;
|
|
508
|
+
let selectorSuccess = false;
|
|
509
|
+
while (retryCount < MAX_RETRIES && !selectorSuccess) {
|
|
510
|
+
try {
|
|
511
|
+
const button = yield page.waitForSelector(selector, {
|
|
512
|
+
state: 'attached',
|
|
513
|
+
timeout: 10000
|
|
514
|
+
});
|
|
515
|
+
if (button) {
|
|
516
|
+
debugLog('Found working selector:', selector);
|
|
517
|
+
return {
|
|
518
|
+
button,
|
|
519
|
+
workingSelector: selector,
|
|
520
|
+
updatedSelectors
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
catch (error) {
|
|
525
|
+
retryCount++;
|
|
526
|
+
debugLog(`Selector "${selector}" failed: attempt ${retryCount}/${MAX_RETRIES}`);
|
|
527
|
+
if (retryCount < MAX_RETRIES) {
|
|
528
|
+
yield page.waitForTimeout(RETRY_DELAY);
|
|
529
|
+
}
|
|
530
|
+
else {
|
|
531
|
+
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
|
|
532
|
+
updatedSelectors = updatedSelectors.filter(s => s !== selector);
|
|
533
|
+
}
|
|
508
534
|
}
|
|
509
535
|
}
|
|
510
|
-
catch (error) {
|
|
511
|
-
debugLog(`Selector failed: ${selector}`);
|
|
512
|
-
}
|
|
513
|
-
}
|
|
514
|
-
// Implement retry mechanism when no selectors work
|
|
515
|
-
if (selectors.length > 0 && retryCount < MAX_RETRIES) {
|
|
516
|
-
debugLog(`Retry attempt ${retryCount + 1} of ${MAX_RETRIES}`);
|
|
517
|
-
yield page.waitForTimeout(RETRY_DELAY);
|
|
518
|
-
return findWorkingButton(selectors, retryCount + 1);
|
|
519
536
|
}
|
|
520
|
-
return {
|
|
537
|
+
return {
|
|
538
|
+
button: null,
|
|
539
|
+
workingSelector: null,
|
|
540
|
+
updatedSelectors
|
|
541
|
+
};
|
|
521
542
|
});
|
|
522
543
|
const retryOperation = (operation, retryCount = 0) => __awaiter(this, void 0, void 0, function* () {
|
|
523
544
|
try {
|
|
@@ -569,7 +590,8 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
569
590
|
yield scrapeCurrentPage();
|
|
570
591
|
if (checkLimit())
|
|
571
592
|
return allResults;
|
|
572
|
-
const { button, workingSelector } = yield findWorkingButton(availableSelectors);
|
|
593
|
+
const { button, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
|
|
594
|
+
availableSelectors = updatedSelectors;
|
|
573
595
|
if (!button || !workingSelector) {
|
|
574
596
|
// Final retry for navigation when no selectors work
|
|
575
597
|
const success = yield retryOperation(() => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -586,7 +608,6 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
586
608
|
return allResults;
|
|
587
609
|
break;
|
|
588
610
|
}
|
|
589
|
-
availableSelectors = availableSelectors.slice(availableSelectors.indexOf(workingSelector));
|
|
590
611
|
let retryCount = 0;
|
|
591
612
|
let navigationSuccess = false;
|
|
592
613
|
while (retryCount < MAX_RETRIES && !navigationSuccess) {
|
|
@@ -651,17 +672,21 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
651
672
|
break;
|
|
652
673
|
}
|
|
653
674
|
case 'clickLoadMore': {
|
|
675
|
+
yield scrapeCurrentPage();
|
|
676
|
+
if (checkLimit())
|
|
677
|
+
return allResults;
|
|
678
|
+
let loadMoreCounter = 0;
|
|
679
|
+
let previousResultCount = allResults.length;
|
|
680
|
+
let noNewItemsCounter = 0;
|
|
681
|
+
const MAX_NO_NEW_ITEMS = 2;
|
|
654
682
|
while (true) {
|
|
655
|
-
// Find working button with retry mechanism
|
|
656
|
-
const { button: loadMoreButton, workingSelector } = yield findWorkingButton(availableSelectors);
|
|
683
|
+
// Find working button with retry mechanism
|
|
684
|
+
const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
|
|
685
|
+
availableSelectors = updatedSelectors;
|
|
657
686
|
if (!workingSelector || !loadMoreButton) {
|
|
658
687
|
debugLog('No working Load More selector found after retries');
|
|
659
|
-
const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
660
|
-
allResults = allResults.concat(finalResults);
|
|
661
688
|
return allResults;
|
|
662
689
|
}
|
|
663
|
-
// Update available selectors to start from the working one
|
|
664
|
-
availableSelectors = availableSelectors.slice(availableSelectors.indexOf(workingSelector));
|
|
665
690
|
// Implement retry mechanism for clicking the button
|
|
666
691
|
let retryCount = 0;
|
|
667
692
|
let clickSuccess = false;
|
|
@@ -685,6 +710,8 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
685
710
|
}
|
|
686
711
|
if (clickSuccess) {
|
|
687
712
|
yield page.waitForTimeout(1000);
|
|
713
|
+
loadMoreCounter++;
|
|
714
|
+
debugLog(`Successfully clicked Load More button (${loadMoreCounter} times)`);
|
|
688
715
|
}
|
|
689
716
|
}
|
|
690
717
|
catch (error) {
|
|
@@ -698,8 +725,6 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
698
725
|
}
|
|
699
726
|
if (!clickSuccess) {
|
|
700
727
|
debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`);
|
|
701
|
-
const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
702
|
-
allResults = allResults.concat(finalResults);
|
|
703
728
|
return allResults;
|
|
704
729
|
}
|
|
705
730
|
// Wait for content to load and check scroll height
|
|
@@ -707,19 +732,30 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
707
732
|
yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
708
733
|
yield page.waitForTimeout(2000);
|
|
709
734
|
const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
|
|
710
|
-
|
|
735
|
+
const heightChanged = currentHeight !== previousHeight;
|
|
736
|
+
previousHeight = currentHeight;
|
|
737
|
+
yield scrapeCurrentPage();
|
|
738
|
+
const currentResultCount = allResults.length;
|
|
739
|
+
const newItemsAdded = currentResultCount > previousResultCount;
|
|
740
|
+
if (!newItemsAdded) {
|
|
741
|
+
noNewItemsCounter++;
|
|
742
|
+
debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`);
|
|
743
|
+
if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) {
|
|
744
|
+
debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`);
|
|
745
|
+
return allResults;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
else {
|
|
749
|
+
noNewItemsCounter = 0;
|
|
750
|
+
previousResultCount = currentResultCount;
|
|
751
|
+
}
|
|
752
|
+
if (checkLimit())
|
|
753
|
+
return allResults;
|
|
754
|
+
if (!heightChanged) {
|
|
711
755
|
debugLog('No more items loaded after Load More');
|
|
712
|
-
const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
713
|
-
allResults = allResults.concat(finalResults);
|
|
714
756
|
return allResults;
|
|
715
757
|
}
|
|
716
|
-
previousHeight = currentHeight;
|
|
717
|
-
if (config.limit && allResults.length >= config.limit) {
|
|
718
|
-
allResults = allResults.slice(0, config.limit);
|
|
719
|
-
break;
|
|
720
|
-
}
|
|
721
758
|
}
|
|
722
|
-
break;
|
|
723
759
|
}
|
|
724
760
|
default: {
|
|
725
761
|
yield scrapeCurrentPage();
|