mx-cloud 0.0.16 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +5 -0
- package/build/interpret.js +89 -13
- package/package.json +5 -2
package/build/interpret.d.ts
CHANGED
|
@@ -54,6 +54,7 @@ export default class Interpreter extends EventEmitter {
|
|
|
54
54
|
private options;
|
|
55
55
|
private concurrency;
|
|
56
56
|
private stopper;
|
|
57
|
+
private isAborted;
|
|
57
58
|
private log;
|
|
58
59
|
private blocker;
|
|
59
60
|
private cumulativeResults;
|
|
@@ -80,6 +81,10 @@ export default class Interpreter extends EventEmitter {
|
|
|
80
81
|
* @returns True if `where` is applicable in the given context, false otherwise
|
|
81
82
|
*/
|
|
82
83
|
private applicable;
|
|
84
|
+
/**
|
|
85
|
+
* Sets the abort flag to immediately stop all operations
|
|
86
|
+
*/
|
|
87
|
+
abort(): void;
|
|
83
88
|
/**
|
|
84
89
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
85
90
|
* calls all mentioned functions on the Page object.\
|
package/build/interpret.js
CHANGED
|
@@ -63,6 +63,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
63
63
|
var _a;
|
|
64
64
|
super();
|
|
65
65
|
this.stopper = null;
|
|
66
|
+
this.isAborted = false;
|
|
66
67
|
this.blocker = null;
|
|
67
68
|
this.cumulativeResults = [];
|
|
68
69
|
this.autohealFailures = [];
|
|
@@ -90,7 +91,9 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
90
91
|
adblocker_playwright_1.PlaywrightBlocker.fromLists(cross_fetch_1.default, ['https://easylist.to/easylist/easylist.txt']).then(blocker => {
|
|
91
92
|
this.blocker = blocker;
|
|
92
93
|
}).catch(err => {
|
|
93
|
-
this.log(`Failed to initialize ad-blocker
|
|
94
|
+
this.log(`Failed to initialize ad-blocker: ${err.message}`, logger_1.Level.ERROR);
|
|
95
|
+
// Continue without ad-blocker rather than crashing
|
|
96
|
+
this.blocker = null;
|
|
94
97
|
});
|
|
95
98
|
}
|
|
96
99
|
trackAutohealFailure(error) {
|
|
@@ -307,6 +310,12 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
307
310
|
}
|
|
308
311
|
});
|
|
309
312
|
}
|
|
313
|
+
/**
|
|
314
|
+
* Sets the abort flag to immediately stop all operations
|
|
315
|
+
*/
|
|
316
|
+
abort() {
|
|
317
|
+
this.isAborted = true;
|
|
318
|
+
}
|
|
310
319
|
/**
|
|
311
320
|
* Given a Playwright's page object and a "declarative" list of actions, this function
|
|
312
321
|
* calls all mentioned functions on the Page object.\
|
|
@@ -319,6 +328,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
319
328
|
carryOutSteps(page, steps) {
|
|
320
329
|
return __awaiter(this, void 0, void 0, function* () {
|
|
321
330
|
var _a, _b;
|
|
331
|
+
// Check abort flag at start of execution
|
|
332
|
+
if (this.isAborted) {
|
|
333
|
+
this.log('Workflow aborted, stopping execution', logger_1.Level.WARN);
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
322
336
|
/**
|
|
323
337
|
* Defines overloaded (or added) methods/actions usable in the workflow.
|
|
324
338
|
* If a method overloads any existing method of the Page class, it accepts the same set
|
|
@@ -376,6 +390,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
376
390
|
}),
|
|
377
391
|
scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
|
|
378
392
|
var _a;
|
|
393
|
+
// Check abort flag at start of scraping
|
|
394
|
+
if (this.isAborted) {
|
|
395
|
+
this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
379
398
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
380
399
|
this.options.debugChannel.setActionType('scrapeSchema');
|
|
381
400
|
}
|
|
@@ -429,6 +448,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
429
448
|
}),
|
|
430
449
|
scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
431
450
|
var _a, _b;
|
|
451
|
+
// Check abort flag at start of scraping
|
|
452
|
+
if (this.isAborted) {
|
|
453
|
+
this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
432
456
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
433
457
|
this.options.debugChannel.setActionType('scrapeList');
|
|
434
458
|
}
|
|
@@ -497,9 +521,15 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
497
521
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
|
498
522
|
this.options.debugChannel.setActionType('script');
|
|
499
523
|
}
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
524
|
+
try {
|
|
525
|
+
const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
|
|
526
|
+
const x = new AsyncFunction('page', 'log', code);
|
|
527
|
+
yield x(page, this.log);
|
|
528
|
+
}
|
|
529
|
+
catch (error) {
|
|
530
|
+
this.log(`Script execution failed: ${error.message}`, logger_1.Level.ERROR);
|
|
531
|
+
throw new Error(`Script execution error: ${error.message}`);
|
|
532
|
+
}
|
|
503
533
|
}),
|
|
504
534
|
flag: () => __awaiter(this, void 0, void 0, function* () {
|
|
505
535
|
return new Promise((res) => {
|
|
@@ -527,6 +557,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
527
557
|
}
|
|
528
558
|
});
|
|
529
559
|
for (const step of steps) {
|
|
560
|
+
// Check abort flag before each step
|
|
561
|
+
if (this.isAborted) {
|
|
562
|
+
this.log('Workflow aborted during step execution', logger_1.Level.WARN);
|
|
563
|
+
return;
|
|
564
|
+
}
|
|
530
565
|
this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
|
|
531
566
|
try {
|
|
532
567
|
if (step.action in wawActions) {
|
|
@@ -583,6 +618,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
583
618
|
}
|
|
584
619
|
handlePagination(page, config) {
|
|
585
620
|
return __awaiter(this, void 0, void 0, function* () {
|
|
621
|
+
// Check abort flag at start of pagination
|
|
622
|
+
if (this.isAborted) {
|
|
623
|
+
this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
|
|
624
|
+
return [];
|
|
625
|
+
}
|
|
586
626
|
let allResults = [];
|
|
587
627
|
let previousHeight = 0;
|
|
588
628
|
let scrapedItems = new Set();
|
|
@@ -594,6 +634,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
594
634
|
console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
|
|
595
635
|
};
|
|
596
636
|
const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
|
|
637
|
+
// Check abort flag before scraping current page
|
|
638
|
+
if (this.isAborted) {
|
|
639
|
+
debugLog("Workflow aborted, stopping scrapeCurrentPage");
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
597
642
|
const results = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
|
|
598
643
|
const newResults = results.filter(item => {
|
|
599
644
|
const uniqueKey = JSON.stringify(item);
|
|
@@ -705,6 +750,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
705
750
|
let unchangedResultCounter = 0;
|
|
706
751
|
try {
|
|
707
752
|
while (true) {
|
|
753
|
+
// Check abort flag at start of each pagination iteration
|
|
754
|
+
if (this.isAborted) {
|
|
755
|
+
this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
|
|
756
|
+
return allResults;
|
|
757
|
+
}
|
|
708
758
|
switch (config.pagination.type) {
|
|
709
759
|
case 'scrollDown': {
|
|
710
760
|
let previousResultCount = allResults.length;
|
|
@@ -712,9 +762,14 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
712
762
|
if (checkLimit()) {
|
|
713
763
|
return allResults;
|
|
714
764
|
}
|
|
715
|
-
yield page.evaluate(() =>
|
|
765
|
+
yield page.evaluate(() => {
|
|
766
|
+
const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
767
|
+
window.scrollTo(0, scrollHeight);
|
|
768
|
+
});
|
|
716
769
|
yield page.waitForTimeout(2000);
|
|
717
|
-
const currentHeight = yield page.evaluate(() =>
|
|
770
|
+
const currentHeight = yield page.evaluate(() => {
|
|
771
|
+
return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
772
|
+
});
|
|
718
773
|
const currentResultCount = allResults.length;
|
|
719
774
|
if (currentResultCount === previousResultCount) {
|
|
720
775
|
unchangedResultCounter++;
|
|
@@ -969,9 +1024,14 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
969
1024
|
}
|
|
970
1025
|
// Wait for content to load and check scroll height
|
|
971
1026
|
yield page.waitForTimeout(2000);
|
|
972
|
-
yield page.evaluate(() =>
|
|
1027
|
+
yield page.evaluate(() => {
|
|
1028
|
+
const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
1029
|
+
window.scrollTo(0, scrollHeight);
|
|
1030
|
+
});
|
|
973
1031
|
yield page.waitForTimeout(2000);
|
|
974
|
-
const currentHeight = yield page.evaluate(() =>
|
|
1032
|
+
const currentHeight = yield page.evaluate(() => {
|
|
1033
|
+
return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
|
|
1034
|
+
});
|
|
975
1035
|
const heightChanged = currentHeight !== previousHeight;
|
|
976
1036
|
previousHeight = currentHeight;
|
|
977
1037
|
yield scrapeCurrentPage();
|
|
@@ -1936,7 +1996,19 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
1936
1996
|
this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
|
|
1937
1997
|
});
|
|
1938
1998
|
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
|
1999
|
+
let loopIterations = 0;
|
|
2000
|
+
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
|
|
1939
2001
|
while (true) {
|
|
2002
|
+
// Circuit breaker to prevent infinite loops
|
|
2003
|
+
if (++loopIterations > MAX_LOOP_ITERATIONS) {
|
|
2004
|
+
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
|
|
2005
|
+
return;
|
|
2006
|
+
}
|
|
2007
|
+
// Check abort flag immediately
|
|
2008
|
+
if (this.isAborted) {
|
|
2009
|
+
this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
|
|
2010
|
+
return;
|
|
2011
|
+
}
|
|
1940
2012
|
// Checks whether the page was closed from outside,
|
|
1941
2013
|
// or the workflow execution has been stopped via `interpreter.stop()`
|
|
1942
2014
|
if (p.isClosed() || !this.stopper) {
|
|
@@ -2001,6 +2073,11 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2001
2073
|
console.log("MATCHED ACTION ID:", actionId);
|
|
2002
2074
|
this.log(`Matched ${JSON.stringify(action === null || action === void 0 ? void 0 : action.where)}`, logger_1.Level.LOG);
|
|
2003
2075
|
if (action) { // action is matched
|
|
2076
|
+
// Check abort flag before executing action
|
|
2077
|
+
if (this.isAborted) {
|
|
2078
|
+
this.log('Workflow aborted before action execution', logger_1.Level.WARN);
|
|
2079
|
+
return;
|
|
2080
|
+
}
|
|
2004
2081
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.activeId) {
|
|
2005
2082
|
this.options.debugChannel.activeId(actionId);
|
|
2006
2083
|
}
|
|
@@ -2017,14 +2094,13 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2017
2094
|
usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
|
|
2018
2095
|
workflowCopy.splice(actionId, 1);
|
|
2019
2096
|
console.log(`Action with ID ${action.id} removed from the workflow copy.`);
|
|
2020
|
-
//
|
|
2097
|
+
// Reset loop iteration counter on successful action
|
|
2098
|
+
loopIterations = 0;
|
|
2021
2099
|
}
|
|
2022
2100
|
catch (e) {
|
|
2023
2101
|
this.log(e, logger_1.Level.ERROR);
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
workflowCopy.splice(actionId, 1);
|
|
2027
|
-
console.log(`Failed action removed. Remaining actions: ${workflowCopy.length}`);
|
|
2102
|
+
// Don't crash on individual action failures - continue with next iteration
|
|
2103
|
+
continue;
|
|
2028
2104
|
}
|
|
2029
2105
|
}
|
|
2030
2106
|
else {
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mx-cloud",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.17",
|
|
4
4
|
"description": "mx cloud",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"typings": "build/index.d.ts",
|
|
7
7
|
"scripts": {
|
|
8
8
|
"test": "jest",
|
|
9
|
-
"build": "
|
|
9
|
+
"build": "tsc",
|
|
10
10
|
"lint": "eslint .",
|
|
11
11
|
"clean": "rimraf ./build"
|
|
12
12
|
},
|
|
@@ -23,5 +23,8 @@
|
|
|
23
23
|
"playwright": "^1.50.0",
|
|
24
24
|
"playwright-extra": "^4.3.6",
|
|
25
25
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^24.3.1"
|
|
26
29
|
}
|
|
27
30
|
}
|