mx-cloud 0.0.16 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +9 -0
- package/build/interpret.js +246 -26
- package/package.json +1 -1
package/build/interpret.d.ts
CHANGED
|
@@ -54,6 +54,7 @@ export default class Interpreter extends EventEmitter {
|
|
|
54
54
|
private options;
|
|
55
55
|
private concurrency;
|
|
56
56
|
private stopper;
|
|
57
|
+
private isAborted;
|
|
57
58
|
private log;
|
|
58
59
|
private blocker;
|
|
59
60
|
private cumulativeResults;
|
|
@@ -80,6 +81,14 @@ export default class Interpreter extends EventEmitter {
|
|
|
80
81
|
* @returns True if `where` is applicable in the given context, false otherwise
|
|
81
82
|
*/
|
|
82
83
|
private applicable;
|
|
84
|
+
/**
|
|
85
|
+
* Sets the abort flag to immediately stop all operations
|
|
86
|
+
*/
|
|
87
|
+
abort(): void;
|
|
88
|
+
/**
|
|
89
|
+
* Returns the current abort status
|
|
90
|
+
*/
|
|
91
|
+
getIsAborted(): boolean;
|
|
83
92
|
/**
|
|
84
93
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
85
94
|
* calls all mentioned functions on the Page object.\
|
package/build/interpret.js
CHANGED
|
@@ -63,6 +63,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
63
63
|
var _a;
|
|
64
64
|
super();
|
|
65
65
|
this.stopper = null;
|
|
66
|
+
this.isAborted = false;
|
|
66
67
|
this.blocker = null;
|
|
67
68
|
this.cumulativeResults = [];
|
|
68
69
|
this.autohealFailures = [];
|
|
@@ -90,7 +91,9 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
90
91
|
adblocker_playwright_1.PlaywrightBlocker.fromLists(cross_fetch_1.default, ['https://easylist.to/easylist/easylist.txt']).then(blocker => {
|
|
91
92
|
this.blocker = blocker;
|
|
92
93
|
}).catch(err => {
|
|
93
|
-
this.log(`Failed to initialize ad-blocker
|
|
94
|
+
this.log(`Failed to initialize ad-blocker: ${err.message}`, logger_1.Level.ERROR);
|
|
95
|
+
// Continue without ad-blocker rather than crashing
|
|
96
|
+
this.blocker = null;
|
|
94
97
|
});
|
|
95
98
|
}
|
|
96
99
|
trackAutohealFailure(error) {
|
|
@@ -307,6 +310,18 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
307
310
|
}
|
|
308
311
|
});
|
|
309
312
|
}
|
|
313
|
+
/**
|
|
314
|
+
* Sets the abort flag to immediately stop all operations
|
|
315
|
+
*/
|
|
316
|
+
abort() {
|
|
317
|
+
this.isAborted = true;
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Returns the current abort status
|
|
321
|
+
*/
|
|
322
|
+
getIsAborted() {
|
|
323
|
+
return this.isAborted;
|
|
324
|
+
}
|
|
310
325
|
/**
|
|
311
326
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
312
327
|
* calls all mentioned functions on the Page object.\
|
|
@@ -319,6 +334,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
319
334
|
carryOutSteps(page, steps) {
|
|
320
335
|
return __awaiter(this, void 0, void 0, function* () {
|
|
321
336
|
var _a, _b;
|
|
337
|
+
// Check abort flag at start of execution
|
|
338
|
+
if (this.isAborted) {
|
|
339
|
+
this.log('Workflow aborted, stopping execution', logger_1.Level.WARN);
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
322
342
|
/**
|
|
323
343
|
* Defines overloaded (or added) methods/actions usable in the workflow.
|
|
324
344
|
* If a method overloads any existing method of the Page class, it accepts the same set
|
|
@@ -376,6 +396,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
376
396
|
}),
|
|
377
397
|
scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
|
|
378
398
|
var _a;
|
|
399
|
+
// Check abort flag at start of scraping
|
|
400
|
+
if (this.isAborted) {
|
|
401
|
+
this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
379
404
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
380
405
|
this.options.debugChannel.setActionType('scrapeSchema');
|
|
381
406
|
}
|
|
@@ -429,6 +454,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
429
454
|
}),
|
|
430
455
|
scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
431
456
|
var _a, _b;
|
|
457
|
+
// Check abort flag at start of scraping
|
|
458
|
+
if (this.isAborted) {
|
|
459
|
+
this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
432
462
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
433
463
|
this.options.debugChannel.setActionType('scrapeList');
|
|
434
464
|
}
|
|
@@ -497,9 +527,15 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
497
527
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
498
528
|
this.options.debugChannel.setActionType('script');
|
|
499
529
|
}
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
530
|
+
try {
|
|
531
|
+
const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
|
|
532
|
+
const x = new AsyncFunction('page', 'log', code);
|
|
533
|
+
yield x(page, this.log);
|
|
534
|
+
}
|
|
535
|
+
catch (error) {
|
|
536
|
+
this.log(`Script execution failed: ${error.message}`, logger_1.Level.ERROR);
|
|
537
|
+
throw new Error(`Script execution error: ${error.message}`);
|
|
538
|
+
}
|
|
503
539
|
}),
|
|
504
540
|
flag: () => __awaiter(this, void 0, void 0, function* () {
|
|
505
541
|
return new Promise((res) => {
|
|
@@ -527,6 +563,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
527
563
|
}
|
|
528
564
|
});
|
|
529
565
|
for (const step of steps) {
|
|
566
|
+
// Check abort flag before each step
|
|
567
|
+
if (this.isAborted) {
|
|
568
|
+
this.log('Workflow aborted during step execution', logger_1.Level.WARN);
|
|
569
|
+
return;
|
|
570
|
+
}
|
|
530
571
|
this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
|
|
531
572
|
try {
|
|
532
573
|
if (step.action in wawActions) {
|
|
@@ -547,7 +588,15 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
547
588
|
}
|
|
548
589
|
if (methodName === 'waitForLoadState') {
|
|
549
590
|
try {
|
|
550
|
-
|
|
591
|
+
// Add timeout if not already specified
|
|
592
|
+
let args = step.args;
|
|
593
|
+
if (Array.isArray(args) && args.length === 1) {
|
|
594
|
+
args = [args[0], { timeout: 30000 }];
|
|
595
|
+
}
|
|
596
|
+
else if (!Array.isArray(args)) {
|
|
597
|
+
args = [args, { timeout: 30000 }];
|
|
598
|
+
}
|
|
599
|
+
yield executeAction(invokee, methodName, args);
|
|
551
600
|
}
|
|
552
601
|
catch (error) {
|
|
553
602
|
yield executeAction(invokee, methodName, 'domcontentloaded');
|
|
@@ -583,6 +632,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
583
632
|
}
|
|
584
633
|
handlePagination(page, config) {
|
|
585
634
|
return __awaiter(this, void 0, void 0, function* () {
|
|
635
|
+
// Check abort flag at start of pagination
|
|
636
|
+
if (this.isAborted) {
|
|
637
|
+
this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
|
|
638
|
+
return [];
|
|
639
|
+
}
|
|
586
640
|
let allResults = [];
|
|
587
641
|
let previousHeight = 0;
|
|
588
642
|
let scrapedItems = new Set();
|
|
@@ -594,7 +648,22 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
594
648
|
console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
|
|
595
649
|
};
|
|
596
650
|
const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
|
|
597
|
-
|
|
651
|
+
// Check abort flag before scraping current page
|
|
652
|
+
if (this.isAborted) {
|
|
653
|
+
debugLog("Workflow aborted, stopping scrapeCurrentPage");
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
// Add timeout to prevent hanging on page evaluation
|
|
657
|
+
const evaluationPromise = page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
658
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Page evaluation timeout')), 10000));
|
|
659
|
+
let results;
|
|
660
|
+
try {
|
|
661
|
+
results = yield Promise.race([evaluationPromise, timeoutPromise]);
|
|
662
|
+
}
|
|
663
|
+
catch (error) {
|
|
664
|
+
debugLog(`Page evaluation failed: ${error.message}`);
|
|
665
|
+
return;
|
|
666
|
+
}
|
|
598
667
|
const newResults = results.filter(item => {
|
|
599
668
|
const uniqueKey = JSON.stringify(item);
|
|
600
669
|
if (scrapedItems.has(uniqueKey))
|
|
@@ -651,14 +720,22 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
651
720
|
});
|
|
652
721
|
// Enhanced button finder with retry mechanism for both CSS and XPath selectors
|
|
653
722
|
const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
|
|
723
|
+
const startTime = Date.now();
|
|
724
|
+
const MAX_BUTTON_SEARCH_TIME = 15000;
|
|
654
725
|
let updatedSelectors = [...selectors];
|
|
655
726
|
for (let i = 0; i < selectors.length; i++) {
|
|
727
|
+
// Check overall timeout
|
|
728
|
+
if (Date.now() - startTime > MAX_BUTTON_SEARCH_TIME) {
|
|
729
|
+
debugLog(`Button search timeout reached (${MAX_BUTTON_SEARCH_TIME}ms), aborting`);
|
|
730
|
+
break;
|
|
731
|
+
}
|
|
656
732
|
const selector = selectors[i];
|
|
657
733
|
let retryCount = 0;
|
|
658
734
|
let selectorSuccess = false;
|
|
659
735
|
while (retryCount < MAX_RETRIES && !selectorSuccess) {
|
|
660
736
|
try {
|
|
661
|
-
|
|
737
|
+
// Reduce timeout to prevent hanging on slow selectors
|
|
738
|
+
const button = yield waitForSelectorUniversal(selector, { timeout: 2000 });
|
|
662
739
|
if (button) {
|
|
663
740
|
debugLog('Found working selector:', selector);
|
|
664
741
|
return {
|
|
@@ -667,16 +744,30 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
667
744
|
updatedSelectors
|
|
668
745
|
};
|
|
669
746
|
}
|
|
747
|
+
else {
|
|
748
|
+
// Treat null result as failed attempt
|
|
749
|
+
retryCount++;
|
|
750
|
+
debugLog(`Selector "${selector}" not found: attempt ${retryCount}/${MAX_RETRIES}`);
|
|
751
|
+
if (retryCount < MAX_RETRIES) {
|
|
752
|
+
yield page.waitForTimeout(RETRY_DELAY);
|
|
753
|
+
}
|
|
754
|
+
else {
|
|
755
|
+
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
|
|
756
|
+
updatedSelectors = updatedSelectors.filter(s => s !== selector);
|
|
757
|
+
selectorSuccess = true; // Exit retry loop for this selector
|
|
758
|
+
}
|
|
759
|
+
}
|
|
670
760
|
}
|
|
671
761
|
catch (error) {
|
|
672
762
|
retryCount++;
|
|
673
|
-
debugLog(`Selector "${selector}"
|
|
763
|
+
debugLog(`Selector "${selector}" error: attempt ${retryCount}/${MAX_RETRIES} - ${error.message}`);
|
|
674
764
|
if (retryCount < MAX_RETRIES) {
|
|
675
765
|
yield page.waitForTimeout(RETRY_DELAY);
|
|
676
766
|
}
|
|
677
767
|
else {
|
|
678
768
|
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
|
|
679
769
|
updatedSelectors = updatedSelectors.filter(s => s !== selector);
|
|
770
|
+
selectorSuccess = true; // Exit retry loop for this selector
|
|
680
771
|
}
|
|
681
772
|
}
|
|
682
773
|
}
|
|
@@ -703,8 +794,30 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
703
794
|
});
|
|
704
795
|
let availableSelectors = config.pagination.selector.split(',');
|
|
705
796
|
let unchangedResultCounter = 0;
|
|
797
|
+
let paginationIterations = 0;
|
|
798
|
+
const MAX_PAGINATION_ITERATIONS = 100; // Prevent infinite pagination
|
|
799
|
+
const paginationStartTime = Date.now();
|
|
800
|
+
const MAX_PAGINATION_TIME = 30 * 60 * 1000; // 30 minutes max for pagination
|
|
706
801
|
try {
|
|
707
802
|
while (true) {
|
|
803
|
+
// Check abort flag at start of each pagination iteration
|
|
804
|
+
if (this.isAborted) {
|
|
805
|
+
this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
|
|
806
|
+
return allResults;
|
|
807
|
+
}
|
|
808
|
+
// Pagination circuit breakers
|
|
809
|
+
if (++paginationIterations > MAX_PAGINATION_ITERATIONS) {
|
|
810
|
+
debugLog(`Maximum pagination iterations reached (${MAX_PAGINATION_ITERATIONS}), stopping`);
|
|
811
|
+
return allResults;
|
|
812
|
+
}
|
|
813
|
+
if (Date.now() - paginationStartTime > MAX_PAGINATION_TIME) {
|
|
814
|
+
debugLog('Maximum pagination time reached (10 minutes), stopping');
|
|
815
|
+
return allResults;
|
|
816
|
+
}
|
|
817
|
+
// Add async yield every 5 iterations to prevent event loop blocking
|
|
818
|
+
if (paginationIterations % 5 === 0) {
|
|
819
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
820
|
+
}
|
|
708
821
|
switch (config.pagination.type) {
|
|
709
822
|
case 'scrollDown': {
|
|
710
823
|
let previousResultCount = allResults.length;
|
|
@@ -712,9 +825,14 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
712
825
|
if (checkLimit()) {
|
|
713
826
|
return allResults;
|
|
714
827
|
}
|
|
715
|
-
yield page.evaluate(() =>
|
|
828
|
+
yield page.evaluate(() => {
|
|
829
|
+
const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
830
|
+
window.scrollTo(0, scrollHeight);
|
|
831
|
+
});
|
|
716
832
|
yield page.waitForTimeout(2000);
|
|
717
|
-
const currentHeight = yield page.evaluate(() =>
|
|
833
|
+
const currentHeight = yield page.evaluate(() => {
|
|
834
|
+
return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
835
|
+
});
|
|
718
836
|
const currentResultCount = allResults.length;
|
|
719
837
|
if (currentResultCount === previousResultCount) {
|
|
720
838
|
unchangedResultCounter++;
|
|
@@ -916,10 +1034,23 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
916
1034
|
if (checkLimit())
|
|
917
1035
|
return allResults;
|
|
918
1036
|
let loadMoreCounter = 0;
|
|
919
|
-
//
|
|
920
|
-
|
|
921
|
-
|
|
1037
|
+
const MAX_LOAD_MORE_ITERATIONS = 100; // Prevent infinite load more
|
|
1038
|
+
const loadMoreStartTime = Date.now();
|
|
1039
|
+
const MAX_LOAD_MORE_TIME = 30 * 60 * 1000; // 5 minutes max for load more
|
|
922
1040
|
while (true) {
|
|
1041
|
+
// Load more circuit breakers
|
|
1042
|
+
if (loadMoreCounter >= MAX_LOAD_MORE_ITERATIONS) {
|
|
1043
|
+
debugLog(`Maximum load more iterations reached (${MAX_LOAD_MORE_ITERATIONS}), stopping`);
|
|
1044
|
+
return allResults;
|
|
1045
|
+
}
|
|
1046
|
+
if (Date.now() - loadMoreStartTime > MAX_LOAD_MORE_TIME) {
|
|
1047
|
+
debugLog('Maximum load more time reached (5 minutes), stopping');
|
|
1048
|
+
return allResults;
|
|
1049
|
+
}
|
|
1050
|
+
// Add async yield every 3 iterations
|
|
1051
|
+
if (loadMoreCounter % 3 === 0 && loadMoreCounter > 0) {
|
|
1052
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
1053
|
+
}
|
|
923
1054
|
// Find working button with retry mechanism
|
|
924
1055
|
const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
|
|
925
1056
|
availableSelectors = updatedSelectors;
|
|
@@ -969,9 +1100,14 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
969
1100
|
}
|
|
970
1101
|
// Wait for content to load and check scroll height
|
|
971
1102
|
yield page.waitForTimeout(2000);
|
|
972
|
-
yield page.evaluate(() =>
|
|
1103
|
+
yield page.evaluate(() => {
|
|
1104
|
+
const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
1105
|
+
window.scrollTo(0, scrollHeight);
|
|
1106
|
+
});
|
|
973
1107
|
yield page.waitForTimeout(2000);
|
|
974
|
-
const currentHeight = yield page.evaluate(() =>
|
|
1108
|
+
const currentHeight = yield page.evaluate(() => {
|
|
1109
|
+
return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
1110
|
+
});
|
|
975
1111
|
const heightChanged = currentHeight !== previousHeight;
|
|
976
1112
|
previousHeight = currentHeight;
|
|
977
1113
|
yield scrapeCurrentPage();
|
|
@@ -1670,12 +1806,35 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1670
1806
|
selector.includes('@id=');
|
|
1671
1807
|
let count = 0;
|
|
1672
1808
|
if (isXPath) {
|
|
1809
|
+
// Add timeout to prevent XPath hanging
|
|
1673
1810
|
const locator = page.locator(`xpath=${selector}`);
|
|
1674
|
-
|
|
1811
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('XPath timeout')), 5000));
|
|
1812
|
+
try {
|
|
1813
|
+
count = yield Promise.race([
|
|
1814
|
+
locator.count(),
|
|
1815
|
+
timeoutPromise
|
|
1816
|
+
]);
|
|
1817
|
+
}
|
|
1818
|
+
catch (error) {
|
|
1819
|
+
// XPath timed out or failed
|
|
1820
|
+
return false;
|
|
1821
|
+
}
|
|
1675
1822
|
}
|
|
1676
1823
|
else {
|
|
1677
|
-
|
|
1678
|
-
|
|
1824
|
+
// Add timeout to CSS selector operations
|
|
1825
|
+
try {
|
|
1826
|
+
const elementsPromise = page.$$(selector);
|
|
1827
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('CSS selector timeout')), 5000));
|
|
1828
|
+
const elements = yield Promise.race([
|
|
1829
|
+
elementsPromise,
|
|
1830
|
+
timeoutPromise
|
|
1831
|
+
]);
|
|
1832
|
+
count = elements ? elements.length : 0;
|
|
1833
|
+
}
|
|
1834
|
+
catch (error) {
|
|
1835
|
+
// CSS selector timed out or failed
|
|
1836
|
+
return false;
|
|
1837
|
+
}
|
|
1679
1838
|
}
|
|
1680
1839
|
// For list selectors, we need multiple elements
|
|
1681
1840
|
if (isListSelector) {
|
|
@@ -1936,7 +2095,33 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1936
2095
|
this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
|
|
1937
2096
|
});
|
|
1938
2097
|
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
|
2098
|
+
let loopIterations = 0;
|
|
2099
|
+
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
|
|
2100
|
+
let consecutiveFailures = 0;
|
|
2101
|
+
const MAX_CONSECUTIVE_FAILURES = 10;
|
|
2102
|
+
const startTime = Date.now();
|
|
2103
|
+
const MAX_EXECUTION_TIME = 30 * 60 * 1000; // 30 minutes max
|
|
1939
2104
|
while (true) {
|
|
2105
|
+
// Multiple circuit breakers to prevent infinite loops
|
|
2106
|
+
if (++loopIterations > MAX_LOOP_ITERATIONS) {
|
|
2107
|
+
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
|
|
2108
|
+
return;
|
|
2109
|
+
}
|
|
2110
|
+
// Time-based circuit breaker
|
|
2111
|
+
if (Date.now() - startTime > MAX_EXECUTION_TIME) {
|
|
2112
|
+
this.log('Maximum execution time reached (30 minutes), terminating workflow', logger_1.Level.ERROR);
|
|
2113
|
+
return;
|
|
2114
|
+
}
|
|
2115
|
+
// Failure-based circuit breaker
|
|
2116
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
2117
|
+
this.log('Too many consecutive failures, terminating to prevent hang', logger_1.Level.ERROR);
|
|
2118
|
+
return;
|
|
2119
|
+
}
|
|
2120
|
+
// Check abort flag immediately
|
|
2121
|
+
if (this.isAborted) {
|
|
2122
|
+
this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
|
|
2123
|
+
return;
|
|
2124
|
+
}
|
|
1940
2125
|
// Checks whether the page was closed from outside,
|
|
1941
2126
|
// or the workflow execution has been stopped via `interpreter.stop()`
|
|
1942
2127
|
if (p.isClosed() || !this.stopper) {
|
|
@@ -2001,6 +2186,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2001
2186
|
console.log("MATCHED ACTION ID:", actionId);
|
|
2002
2187
|
this.log(`Matched ${JSON.stringify(action === null || action === void 0 ? void 0 : action.where)}`, logger_1.Level.LOG);
|
|
2003
2188
|
if (action) { // action is matched
|
|
2189
|
+
// Check abort flag before executing action
|
|
2190
|
+
if (this.isAborted) {
|
|
2191
|
+
this.log('Workflow aborted before action execution', logger_1.Level.WARN);
|
|
2192
|
+
return;
|
|
2193
|
+
}
|
|
2004
2194
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.activeId) {
|
|
2005
2195
|
this.options.debugChannel.activeId(actionId);
|
|
2006
2196
|
}
|
|
@@ -2017,14 +2207,21 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2017
2207
|
usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
|
|
2018
2208
|
workflowCopy.splice(actionId, 1);
|
|
2019
2209
|
console.log(`Action with ID ${action.id} removed from the workflow copy.`);
|
|
2020
|
-
//
|
|
2210
|
+
// Reset counters on successful action (but keep some history to prevent infinite resets)
|
|
2211
|
+
loopIterations = Math.max(0, loopIterations - 10);
|
|
2212
|
+
consecutiveFailures = 0;
|
|
2213
|
+
// Add async yield to prevent event loop blocking
|
|
2214
|
+
if (loopIterations % 10 === 0) {
|
|
2215
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
2216
|
+
}
|
|
2021
2217
|
}
|
|
2022
2218
|
catch (e) {
|
|
2023
2219
|
this.log(e, logger_1.Level.ERROR);
|
|
2024
|
-
|
|
2025
|
-
//
|
|
2026
|
-
|
|
2027
|
-
|
|
2220
|
+
consecutiveFailures++;
|
|
2221
|
+
// Add delay on failures to prevent tight error loops
|
|
2222
|
+
yield new Promise(resolve => setTimeout(resolve, Math.min(1000, consecutiveFailures * 200)));
|
|
2223
|
+
// Don't crash on individual action failures - continue with next iteration
|
|
2224
|
+
continue;
|
|
2028
2225
|
}
|
|
2029
2226
|
}
|
|
2030
2227
|
else {
|
|
@@ -2036,9 +2233,32 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2036
2233
|
}
|
|
2037
2234
|
ensureScriptsLoaded(page) {
|
|
2038
2235
|
return __awaiter(this, void 0, void 0, function* () {
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2236
|
+
try {
|
|
2237
|
+
// Add timeout to prevent hanging on script evaluation
|
|
2238
|
+
const evaluationPromise = page.evaluate(() => typeof window.scrape === 'function' &&
|
|
2239
|
+
typeof window.scrapeSchema === 'function' &&
|
|
2240
|
+
typeof window.scrapeList === 'function' &&
|
|
2241
|
+
typeof window.scrapeListAuto === 'function' &&
|
|
2242
|
+
typeof window.scrollDown === 'function' &&
|
|
2243
|
+
typeof window.scrollUp === 'function');
|
|
2244
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Script check timeout')), 3000));
|
|
2245
|
+
const isScriptLoaded = yield Promise.race([
|
|
2246
|
+
evaluationPromise,
|
|
2247
|
+
timeoutPromise
|
|
2248
|
+
]);
|
|
2249
|
+
if (!isScriptLoaded) {
|
|
2250
|
+
yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
catch (error) {
|
|
2254
|
+
// If script check fails, try to add the script anyway
|
|
2255
|
+
this.log(`Script check failed, adding script anyway: ${error.message}`, logger_1.Level.WARN);
|
|
2256
|
+
try {
|
|
2257
|
+
yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
|
|
2258
|
+
}
|
|
2259
|
+
catch (scriptError) {
|
|
2260
|
+
this.log(`Failed to add script: ${scriptError.message}`, logger_1.Level.ERROR);
|
|
2261
|
+
}
|
|
2042
2262
|
}
|
|
2043
2263
|
});
|
|
2044
2264
|
}
|