mx-cloud 0.0.17 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +7 -0
- package/build/interpret.js +257 -22
- package/build/preprocessor.js +2 -0
- package/build/types/workflow.d.ts +2 -0
- package/package.json +2 -5
package/build/interpret.d.ts
CHANGED
|
@@ -59,6 +59,9 @@ export default class Interpreter extends EventEmitter {
|
|
|
59
59
|
private blocker;
|
|
60
60
|
private cumulativeResults;
|
|
61
61
|
private autohealFailures;
|
|
62
|
+
private namedResults;
|
|
63
|
+
private screenshotCounter;
|
|
64
|
+
private serializableDataByType;
|
|
62
65
|
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
|
|
63
66
|
trackAutohealFailure(error: string): void;
|
|
64
67
|
private applyAdBlocker;
|
|
@@ -85,6 +88,10 @@ export default class Interpreter extends EventEmitter {
|
|
|
85
88
|
* Sets the abort flag to immediately stop all operations
|
|
86
89
|
*/
|
|
87
90
|
abort(): void;
|
|
91
|
+
/**
|
|
92
|
+
* Returns the current abort status
|
|
93
|
+
*/
|
|
94
|
+
getIsAborted(): boolean;
|
|
88
95
|
/**
|
|
89
96
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
90
97
|
* calls all mentioned functions on the Page object.\
|
package/build/interpret.js
CHANGED
|
@@ -67,6 +67,12 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
67
67
|
this.blocker = null;
|
|
68
68
|
this.cumulativeResults = [];
|
|
69
69
|
this.autohealFailures = [];
|
|
70
|
+
this.namedResults = {};
|
|
71
|
+
this.screenshotCounter = 0;
|
|
72
|
+
this.serializableDataByType = {
|
|
73
|
+
scrapeList: {},
|
|
74
|
+
scrapeSchema: {}
|
|
75
|
+
};
|
|
70
76
|
this.workflow = workflow.workflow;
|
|
71
77
|
this.initializedWorkflow = null;
|
|
72
78
|
this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => {
|
|
@@ -316,6 +322,12 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
316
322
|
abort() {
|
|
317
323
|
this.isAborted = true;
|
|
318
324
|
}
|
|
325
|
+
/**
|
|
326
|
+
* Returns the current abort status
|
|
327
|
+
*/
|
|
328
|
+
getIsAborted() {
|
|
329
|
+
return this.isAborted;
|
|
330
|
+
}
|
|
319
331
|
/**
|
|
320
332
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
321
333
|
* calls all mentioned functions on the Page object.\
|
|
@@ -342,13 +354,22 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
342
354
|
* Beware of false linter errors - here, we know better!
|
|
343
355
|
*/
|
|
344
356
|
const wawActions = {
|
|
345
|
-
screenshot: (params) => __awaiter(this, void 0, void 0, function* () {
|
|
357
|
+
screenshot: (params, nameOverride) => __awaiter(this, void 0, void 0, function* () {
|
|
346
358
|
var _a;
|
|
347
359
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
348
|
-
this.options.debugChannel.setActionType(
|
|
360
|
+
this.options.debugChannel.setActionType("screenshot");
|
|
349
361
|
}
|
|
350
362
|
const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
|
|
351
|
-
|
|
363
|
+
const baseName = nameOverride || "Screenshot";
|
|
364
|
+
// ✅ Use the typed class property
|
|
365
|
+
this.screenshotCounter += 1;
|
|
366
|
+
const screenshotName = `${baseName} ${this.screenshotCounter}`;
|
|
367
|
+
// ✅ Pass structured metadata to binaryCallback
|
|
368
|
+
yield this.options.binaryCallback({
|
|
369
|
+
name: screenshotName,
|
|
370
|
+
data: screenshotBuffer,
|
|
371
|
+
mimeType: "image/png",
|
|
372
|
+
}, "image/png");
|
|
352
373
|
}),
|
|
353
374
|
enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
|
|
354
375
|
var _a;
|
|
@@ -444,7 +465,31 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
444
465
|
}
|
|
445
466
|
console.log("Total accumulated rows:", this.cumulativeResults.length);
|
|
446
467
|
console.log("Current results:", this.cumulativeResults);
|
|
447
|
-
|
|
468
|
+
// ✅ Append schema results under "scrapeSchema" → name
|
|
469
|
+
const actionType = "scrapeSchema";
|
|
470
|
+
const actionName = schema.__name || "Texts";
|
|
471
|
+
if (!this.namedResults[actionType])
|
|
472
|
+
this.namedResults[actionType] = {};
|
|
473
|
+
this.namedResults[actionType][actionName] = this.cumulativeResults;
|
|
474
|
+
if (!this.serializableDataByType[actionType])
|
|
475
|
+
this.serializableDataByType[actionType] = {};
|
|
476
|
+
if (!this.serializableDataByType[actionType])
|
|
477
|
+
this.serializableDataByType[actionType] = {};
|
|
478
|
+
if (!this.serializableDataByType[actionType][actionName]) {
|
|
479
|
+
this.serializableDataByType[actionType][actionName] = {};
|
|
480
|
+
}
|
|
481
|
+
for (const row of this.cumulativeResults) {
|
|
482
|
+
for (const [key, value] of Object.entries(row)) {
|
|
483
|
+
if (value !== undefined) {
|
|
484
|
+
this.serializableDataByType[actionType][actionName][key] = value;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
// now emit full structured object
|
|
489
|
+
yield this.options.serializableCallback({
|
|
490
|
+
scrapeList: this.serializableDataByType.scrapeList,
|
|
491
|
+
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
492
|
+
});
|
|
448
493
|
}),
|
|
449
494
|
scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
450
495
|
var _a, _b;
|
|
@@ -485,12 +530,29 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
485
530
|
scrapeResults = [];
|
|
486
531
|
}
|
|
487
532
|
console.log(`ScrapeList completed with ${scrapeResults.length} results`);
|
|
488
|
-
|
|
533
|
+
// ✅ Append list results under "scrapeList" → name
|
|
534
|
+
const actionType = "scrapeList";
|
|
535
|
+
const actionName = config.__name || "List";
|
|
536
|
+
if (!this.serializableDataByType[actionType])
|
|
537
|
+
this.serializableDataByType[actionType] = {};
|
|
538
|
+
if (!this.serializableDataByType[actionType][actionName]) {
|
|
539
|
+
this.serializableDataByType[actionType][actionName] = [];
|
|
540
|
+
}
|
|
541
|
+
this.serializableDataByType[actionType][actionName].push(...scrapeResults);
|
|
542
|
+
yield this.options.serializableCallback({
|
|
543
|
+
scrapeList: this.serializableDataByType.scrapeList,
|
|
544
|
+
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
545
|
+
});
|
|
489
546
|
}
|
|
490
547
|
catch (error) {
|
|
491
548
|
console.error('ScrapeList action failed completely:', error.message);
|
|
492
549
|
// Don't throw error, just return empty array
|
|
493
|
-
|
|
550
|
+
const actionType = "scrapeList";
|
|
551
|
+
const actionName = config.__name || "List";
|
|
552
|
+
if (!this.namedResults[actionType])
|
|
553
|
+
this.namedResults[actionType] = {};
|
|
554
|
+
this.namedResults[actionType][actionName] = [];
|
|
555
|
+
yield this.options.serializableCallback(this.namedResults);
|
|
494
556
|
}
|
|
495
557
|
}),
|
|
496
558
|
scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -563,6 +625,41 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
563
625
|
return;
|
|
564
626
|
}
|
|
565
627
|
this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
|
|
628
|
+
try {
|
|
629
|
+
const debug = this.options.debugChannel;
|
|
630
|
+
if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
|
|
631
|
+
debug.setActionType(String(step.action));
|
|
632
|
+
}
|
|
633
|
+
// Safely extract name for this step
|
|
634
|
+
let stepName = null;
|
|
635
|
+
// If the workflow step itself has a name
|
|
636
|
+
if (step === null || step === void 0 ? void 0 : step.name) {
|
|
637
|
+
stepName = step.name;
|
|
638
|
+
}
|
|
639
|
+
// If the first arg is an object with a __name property
|
|
640
|
+
else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
|
|
641
|
+
step.args.length > 0 &&
|
|
642
|
+
typeof step.args[0] === "object" &&
|
|
643
|
+
"__name" in step.args[0]) {
|
|
644
|
+
stepName = step.args[0].__name;
|
|
645
|
+
}
|
|
646
|
+
// If args itself is an object with a __name property
|
|
647
|
+
else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
|
|
648
|
+
(step === null || step === void 0 ? void 0 : step.args) !== null &&
|
|
649
|
+
"__name" in step.args) {
|
|
650
|
+
stepName = step.args.__name;
|
|
651
|
+
}
|
|
652
|
+
// Default fallback
|
|
653
|
+
if (!stepName) {
|
|
654
|
+
stepName = String(step.action);
|
|
655
|
+
}
|
|
656
|
+
if (debug && typeof debug.setActionName === "function") {
|
|
657
|
+
debug.setActionName(stepName);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
catch (err) {
|
|
661
|
+
this.log(`Failed to set action name/type: ${err.message}`, logger_1.Level.WARN);
|
|
662
|
+
}
|
|
566
663
|
try {
|
|
567
664
|
if (step.action in wawActions) {
|
|
568
665
|
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
|
@@ -582,7 +679,15 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
582
679
|
}
|
|
583
680
|
if (methodName === 'waitForLoadState') {
|
|
584
681
|
try {
|
|
585
|
-
|
|
682
|
+
// Add timeout if not already specified
|
|
683
|
+
let args = step.args;
|
|
684
|
+
if (Array.isArray(args) && args.length === 1) {
|
|
685
|
+
args = [args[0], { timeout: 30000 }];
|
|
686
|
+
}
|
|
687
|
+
else if (!Array.isArray(args)) {
|
|
688
|
+
args = [args, { timeout: 30000 }];
|
|
689
|
+
}
|
|
690
|
+
yield executeAction(invokee, methodName, args);
|
|
586
691
|
}
|
|
587
692
|
catch (error) {
|
|
588
693
|
yield executeAction(invokee, methodName, 'domcontentloaded');
|
|
@@ -639,7 +744,17 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
639
744
|
debugLog("Workflow aborted, stopping scrapeCurrentPage");
|
|
640
745
|
return;
|
|
641
746
|
}
|
|
642
|
-
|
|
747
|
+
// Add timeout to prevent hanging on page evaluation
|
|
748
|
+
const evaluationPromise = page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
749
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Page evaluation timeout')), 10000));
|
|
750
|
+
let results;
|
|
751
|
+
try {
|
|
752
|
+
results = yield Promise.race([evaluationPromise, timeoutPromise]);
|
|
753
|
+
}
|
|
754
|
+
catch (error) {
|
|
755
|
+
debugLog(`Page evaluation failed: ${error.message}`);
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
643
758
|
const newResults = results.filter(item => {
|
|
644
759
|
const uniqueKey = JSON.stringify(item);
|
|
645
760
|
if (scrapedItems.has(uniqueKey))
|
|
@@ -696,14 +811,22 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
696
811
|
});
|
|
697
812
|
// Enhanced button finder with retry mechanism for both CSS and XPath selectors
|
|
698
813
|
const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
|
|
814
|
+
const startTime = Date.now();
|
|
815
|
+
const MAX_BUTTON_SEARCH_TIME = 15000;
|
|
699
816
|
let updatedSelectors = [...selectors];
|
|
700
817
|
for (let i = 0; i < selectors.length; i++) {
|
|
818
|
+
// Check overall timeout
|
|
819
|
+
if (Date.now() - startTime > MAX_BUTTON_SEARCH_TIME) {
|
|
820
|
+
debugLog(`Button search timeout reached (${MAX_BUTTON_SEARCH_TIME}ms), aborting`);
|
|
821
|
+
break;
|
|
822
|
+
}
|
|
701
823
|
const selector = selectors[i];
|
|
702
824
|
let retryCount = 0;
|
|
703
825
|
let selectorSuccess = false;
|
|
704
826
|
while (retryCount < MAX_RETRIES && !selectorSuccess) {
|
|
705
827
|
try {
|
|
706
|
-
|
|
828
|
+
// Reduce timeout to prevent hanging on slow selectors
|
|
829
|
+
const button = yield waitForSelectorUniversal(selector, { timeout: 2000 });
|
|
707
830
|
if (button) {
|
|
708
831
|
debugLog('Found working selector:', selector);
|
|
709
832
|
return {
|
|
@@ -712,16 +835,30 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
712
835
|
updatedSelectors
|
|
713
836
|
};
|
|
714
837
|
}
|
|
838
|
+
else {
|
|
839
|
+
// Treat null result as failed attempt
|
|
840
|
+
retryCount++;
|
|
841
|
+
debugLog(`Selector "${selector}" not found: attempt ${retryCount}/${MAX_RETRIES}`);
|
|
842
|
+
if (retryCount < MAX_RETRIES) {
|
|
843
|
+
yield page.waitForTimeout(RETRY_DELAY);
|
|
844
|
+
}
|
|
845
|
+
else {
|
|
846
|
+
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
|
|
847
|
+
updatedSelectors = updatedSelectors.filter(s => s !== selector);
|
|
848
|
+
selectorSuccess = true; // Exit retry loop for this selector
|
|
849
|
+
}
|
|
850
|
+
}
|
|
715
851
|
}
|
|
716
852
|
catch (error) {
|
|
717
853
|
retryCount++;
|
|
718
|
-
debugLog(`Selector "${selector}"
|
|
854
|
+
debugLog(`Selector "${selector}" error: attempt ${retryCount}/${MAX_RETRIES} - ${error.message}`);
|
|
719
855
|
if (retryCount < MAX_RETRIES) {
|
|
720
856
|
yield page.waitForTimeout(RETRY_DELAY);
|
|
721
857
|
}
|
|
722
858
|
else {
|
|
723
859
|
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
|
|
724
860
|
updatedSelectors = updatedSelectors.filter(s => s !== selector);
|
|
861
|
+
selectorSuccess = true; // Exit retry loop for this selector
|
|
725
862
|
}
|
|
726
863
|
}
|
|
727
864
|
}
|
|
@@ -748,6 +885,10 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
748
885
|
});
|
|
749
886
|
let availableSelectors = config.pagination.selector.split(',');
|
|
750
887
|
let unchangedResultCounter = 0;
|
|
888
|
+
let paginationIterations = 0;
|
|
889
|
+
const MAX_PAGINATION_ITERATIONS = 100; // Prevent infinite pagination
|
|
890
|
+
const paginationStartTime = Date.now();
|
|
891
|
+
const MAX_PAGINATION_TIME = 30 * 60 * 1000; // 30 minutes max for pagination
|
|
751
892
|
try {
|
|
752
893
|
while (true) {
|
|
753
894
|
// Check abort flag at start of each pagination iteration
|
|
@@ -755,6 +896,19 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
755
896
|
this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
|
|
756
897
|
return allResults;
|
|
757
898
|
}
|
|
899
|
+
// Pagination circuit breakers
|
|
900
|
+
if (++paginationIterations > MAX_PAGINATION_ITERATIONS) {
|
|
901
|
+
debugLog(`Maximum pagination iterations reached (${MAX_PAGINATION_ITERATIONS}), stopping`);
|
|
902
|
+
return allResults;
|
|
903
|
+
}
|
|
904
|
+
if (Date.now() - paginationStartTime > MAX_PAGINATION_TIME) {
|
|
905
|
+
debugLog('Maximum pagination time reached (10 minutes), stopping');
|
|
906
|
+
return allResults;
|
|
907
|
+
}
|
|
908
|
+
// Add async yield every 5 iterations to prevent event loop blocking
|
|
909
|
+
if (paginationIterations % 5 === 0) {
|
|
910
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
911
|
+
}
|
|
758
912
|
switch (config.pagination.type) {
|
|
759
913
|
case 'scrollDown': {
|
|
760
914
|
let previousResultCount = allResults.length;
|
|
@@ -971,10 +1125,23 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
971
1125
|
if (checkLimit())
|
|
972
1126
|
return allResults;
|
|
973
1127
|
let loadMoreCounter = 0;
|
|
974
|
-
//
|
|
975
|
-
|
|
976
|
-
|
|
1128
|
+
const MAX_LOAD_MORE_ITERATIONS = 100; // Prevent infinite load more
|
|
1129
|
+
const loadMoreStartTime = Date.now();
|
|
1130
|
+
const MAX_LOAD_MORE_TIME = 30 * 60 * 1000; // 5 minutes max for load more
|
|
977
1131
|
while (true) {
|
|
1132
|
+
// Load more circuit breakers
|
|
1133
|
+
if (loadMoreCounter >= MAX_LOAD_MORE_ITERATIONS) {
|
|
1134
|
+
debugLog(`Maximum load more iterations reached (${MAX_LOAD_MORE_ITERATIONS}), stopping`);
|
|
1135
|
+
return allResults;
|
|
1136
|
+
}
|
|
1137
|
+
if (Date.now() - loadMoreStartTime > MAX_LOAD_MORE_TIME) {
|
|
1138
|
+
debugLog('Maximum load more time reached (5 minutes), stopping');
|
|
1139
|
+
return allResults;
|
|
1140
|
+
}
|
|
1141
|
+
// Add async yield every 3 iterations
|
|
1142
|
+
if (loadMoreCounter % 3 === 0 && loadMoreCounter > 0) {
|
|
1143
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
1144
|
+
}
|
|
978
1145
|
// Find working button with retry mechanism
|
|
979
1146
|
const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
|
|
980
1147
|
availableSelectors = updatedSelectors;
|
|
@@ -1730,12 +1897,35 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1730
1897
|
selector.includes('@id=');
|
|
1731
1898
|
let count = 0;
|
|
1732
1899
|
if (isXPath) {
|
|
1900
|
+
// Add timeout to prevent XPath hanging
|
|
1733
1901
|
const locator = page.locator(`xpath=${selector}`);
|
|
1734
|
-
|
|
1902
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('XPath timeout')), 5000));
|
|
1903
|
+
try {
|
|
1904
|
+
count = yield Promise.race([
|
|
1905
|
+
locator.count(),
|
|
1906
|
+
timeoutPromise
|
|
1907
|
+
]);
|
|
1908
|
+
}
|
|
1909
|
+
catch (error) {
|
|
1910
|
+
// XPath timed out or failed
|
|
1911
|
+
return false;
|
|
1912
|
+
}
|
|
1735
1913
|
}
|
|
1736
1914
|
else {
|
|
1737
|
-
|
|
1738
|
-
|
|
1915
|
+
// Add timeout to CSS selector operations
|
|
1916
|
+
try {
|
|
1917
|
+
const elementsPromise = page.$$(selector);
|
|
1918
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('CSS selector timeout')), 5000));
|
|
1919
|
+
const elements = yield Promise.race([
|
|
1920
|
+
elementsPromise,
|
|
1921
|
+
timeoutPromise
|
|
1922
|
+
]);
|
|
1923
|
+
count = elements ? elements.length : 0;
|
|
1924
|
+
}
|
|
1925
|
+
catch (error) {
|
|
1926
|
+
// CSS selector timed out or failed
|
|
1927
|
+
return false;
|
|
1928
|
+
}
|
|
1739
1929
|
}
|
|
1740
1930
|
// For list selectors, we need multiple elements
|
|
1741
1931
|
if (isListSelector) {
|
|
@@ -1998,12 +2188,26 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1998
2188
|
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
|
1999
2189
|
let loopIterations = 0;
|
|
2000
2190
|
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
|
|
2191
|
+
let consecutiveFailures = 0;
|
|
2192
|
+
const MAX_CONSECUTIVE_FAILURES = 10;
|
|
2193
|
+
const startTime = Date.now();
|
|
2194
|
+
const MAX_EXECUTION_TIME = 30 * 60 * 1000; // 30 minutes max
|
|
2001
2195
|
while (true) {
|
|
2002
|
-
//
|
|
2196
|
+
// Multiple circuit breakers to prevent infinite loops
|
|
2003
2197
|
if (++loopIterations > MAX_LOOP_ITERATIONS) {
|
|
2004
2198
|
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
|
|
2005
2199
|
return;
|
|
2006
2200
|
}
|
|
2201
|
+
// Time-based circuit breaker
|
|
2202
|
+
if (Date.now() - startTime > MAX_EXECUTION_TIME) {
|
|
2203
|
+
this.log('Maximum execution time reached (30 minutes), terminating workflow', logger_1.Level.ERROR);
|
|
2204
|
+
return;
|
|
2205
|
+
}
|
|
2206
|
+
// Failure-based circuit breaker
|
|
2207
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
2208
|
+
this.log('Too many consecutive failures, terminating to prevent hang', logger_1.Level.ERROR);
|
|
2209
|
+
return;
|
|
2210
|
+
}
|
|
2007
2211
|
// Check abort flag immediately
|
|
2008
2212
|
if (this.isAborted) {
|
|
2009
2213
|
this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
|
|
@@ -2094,11 +2298,19 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2094
2298
|
usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
|
|
2095
2299
|
workflowCopy.splice(actionId, 1);
|
|
2096
2300
|
console.log(`Action with ID ${action.id} removed from the workflow copy.`);
|
|
2097
|
-
// Reset
|
|
2098
|
-
loopIterations = 0;
|
|
2301
|
+
// Reset counters on successful action (but keep some history to prevent infinite resets)
|
|
2302
|
+
loopIterations = Math.max(0, loopIterations - 10);
|
|
2303
|
+
consecutiveFailures = 0;
|
|
2304
|
+
// Add async yield to prevent event loop blocking
|
|
2305
|
+
if (loopIterations % 10 === 0) {
|
|
2306
|
+
yield new Promise(resolve => setImmediate(resolve));
|
|
2307
|
+
}
|
|
2099
2308
|
}
|
|
2100
2309
|
catch (e) {
|
|
2101
2310
|
this.log(e, logger_1.Level.ERROR);
|
|
2311
|
+
consecutiveFailures++;
|
|
2312
|
+
// Add delay on failures to prevent tight error loops
|
|
2313
|
+
yield new Promise(resolve => setTimeout(resolve, Math.min(1000, consecutiveFailures * 200)));
|
|
2102
2314
|
// Don't crash on individual action failures - continue with next iteration
|
|
2103
2315
|
continue;
|
|
2104
2316
|
}
|
|
@@ -2112,9 +2324,32 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2112
2324
|
}
|
|
2113
2325
|
ensureScriptsLoaded(page) {
|
|
2114
2326
|
return __awaiter(this, void 0, void 0, function* () {
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2327
|
+
try {
|
|
2328
|
+
// Add timeout to prevent hanging on script evaluation
|
|
2329
|
+
const evaluationPromise = page.evaluate(() => typeof window.scrape === 'function' &&
|
|
2330
|
+
typeof window.scrapeSchema === 'function' &&
|
|
2331
|
+
typeof window.scrapeList === 'function' &&
|
|
2332
|
+
typeof window.scrapeListAuto === 'function' &&
|
|
2333
|
+
typeof window.scrollDown === 'function' &&
|
|
2334
|
+
typeof window.scrollUp === 'function');
|
|
2335
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Script check timeout')), 3000));
|
|
2336
|
+
const isScriptLoaded = yield Promise.race([
|
|
2337
|
+
evaluationPromise,
|
|
2338
|
+
timeoutPromise
|
|
2339
|
+
]);
|
|
2340
|
+
if (!isScriptLoaded) {
|
|
2341
|
+
yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
2344
|
+
catch (error) {
|
|
2345
|
+
// If script check fails, try to add the script anyway
|
|
2346
|
+
this.log(`Script check failed, adding script anyway: ${error.message}`, logger_1.Level.WARN);
|
|
2347
|
+
try {
|
|
2348
|
+
yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
|
|
2349
|
+
}
|
|
2350
|
+
catch (scriptError) {
|
|
2351
|
+
this.log(`Failed to add script: ${scriptError.message}`, logger_1.Level.ERROR);
|
|
2352
|
+
}
|
|
2118
2353
|
}
|
|
2119
2354
|
});
|
|
2120
2355
|
}
|
package/build/preprocessor.js
CHANGED
|
@@ -34,6 +34,8 @@ class Preprocessor {
|
|
|
34
34
|
what: joi_1.default.array().items({
|
|
35
35
|
action: joi_1.default.string().required(),
|
|
36
36
|
args: joi_1.default.array().items(joi_1.default.any()),
|
|
37
|
+
name: joi_1.default.string(),
|
|
38
|
+
actionId: joi_1.default.string()
|
|
37
39
|
}).required(),
|
|
38
40
|
})).required(),
|
|
39
41
|
});
|
|
@@ -27,6 +27,8 @@ export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot
|
|
|
27
27
|
export type What = {
|
|
28
28
|
action: MethodNames<Page> | CustomFunctions;
|
|
29
29
|
args?: any[];
|
|
30
|
+
name?: string;
|
|
31
|
+
actionId?: string;
|
|
30
32
|
};
|
|
31
33
|
export type PageState = Partial<BaseConditions>;
|
|
32
34
|
export type ParamType = Record<string, any>;
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mx-cloud",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.19",
|
|
4
4
|
"description": "mx cloud",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"typings": "build/index.d.ts",
|
|
7
7
|
"scripts": {
|
|
8
8
|
"test": "jest",
|
|
9
|
-
"build": "tsc",
|
|
9
|
+
"build": "npm run clean && tsc",
|
|
10
10
|
"lint": "eslint .",
|
|
11
11
|
"clean": "rimraf ./build"
|
|
12
12
|
},
|
|
@@ -23,8 +23,5 @@
|
|
|
23
23
|
"playwright": "^1.50.0",
|
|
24
24
|
"playwright-extra": "^4.3.6",
|
|
25
25
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
26
|
-
},
|
|
27
|
-
"devDependencies": {
|
|
28
|
-
"@types/node": "^24.3.1"
|
|
29
26
|
}
|
|
30
27
|
}
|