mx-cloud 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,6 +54,7 @@ export default class Interpreter extends EventEmitter {
54
54
  private options;
55
55
  private concurrency;
56
56
  private stopper;
57
+ private isAborted;
57
58
  private log;
58
59
  private blocker;
59
60
  private cumulativeResults;
@@ -80,6 +81,10 @@ export default class Interpreter extends EventEmitter {
80
81
  * @returns True if `where` is applicable in the given context, false otherwise
81
82
  */
82
83
  private applicable;
84
+ /**
85
+ * Sets the abort flag to immediately stop all operations
86
+ */
87
+ abort(): void;
83
88
  /**
84
89
  * Given a Playwright's page object and a "declarative" list of actions, this function
85
90
  * calls all mentioned functions on the Page object.\
@@ -63,6 +63,7 @@ class Interpreter extends events_1.EventEmitter {
63
63
  var _a;
64
64
  super();
65
65
  this.stopper = null;
66
+ this.isAborted = false;
66
67
  this.blocker = null;
67
68
  this.cumulativeResults = [];
68
69
  this.autohealFailures = [];
@@ -90,7 +91,9 @@ class Interpreter extends events_1.EventEmitter {
90
91
  adblocker_playwright_1.PlaywrightBlocker.fromLists(cross_fetch_1.default, ['https://easylist.to/easylist/easylist.txt']).then(blocker => {
91
92
  this.blocker = blocker;
92
93
  }).catch(err => {
93
- this.log(`Failed to initialize ad-blocker:`, logger_1.Level.ERROR);
94
+ this.log(`Failed to initialize ad-blocker: ${err.message}`, logger_1.Level.ERROR);
95
+ // Continue without ad-blocker rather than crashing
96
+ this.blocker = null;
94
97
  });
95
98
  }
96
99
  trackAutohealFailure(error) {
@@ -307,6 +310,12 @@ class Interpreter extends events_1.EventEmitter {
307
310
  }
308
311
  });
309
312
  }
313
+ /**
314
+ * Sets the abort flag to immediately stop all operations
315
+ */
316
+ abort() {
317
+ this.isAborted = true;
318
+ }
310
319
  /**
311
320
  * Given a Playwright's page object and a "declarative" list of actions, this function
312
321
  * calls all mentioned functions on the Page object.\
@@ -319,6 +328,11 @@ class Interpreter extends events_1.EventEmitter {
319
328
  carryOutSteps(page, steps) {
320
329
  return __awaiter(this, void 0, void 0, function* () {
321
330
  var _a, _b;
331
+ // Check abort flag at start of execution
332
+ if (this.isAborted) {
333
+ this.log('Workflow aborted, stopping execution', logger_1.Level.WARN);
334
+ return;
335
+ }
322
336
  /**
323
337
  * Defines overloaded (or added) methods/actions usable in the workflow.
324
338
  * If a method overloads any existing method of the Page class, it accepts the same set
@@ -376,6 +390,11 @@ class Interpreter extends events_1.EventEmitter {
376
390
  }),
377
391
  scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
378
392
  var _a;
393
+ // Check abort flag at start of scraping
394
+ if (this.isAborted) {
395
+ this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
396
+ return;
397
+ }
379
398
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
380
399
  this.options.debugChannel.setActionType('scrapeSchema');
381
400
  }
@@ -429,6 +448,11 @@ class Interpreter extends events_1.EventEmitter {
429
448
  }),
430
449
  scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
431
450
  var _a, _b;
451
+ // Check abort flag at start of scraping
452
+ if (this.isAborted) {
453
+ this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
454
+ return;
455
+ }
432
456
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
433
457
  this.options.debugChannel.setActionType('scrapeList');
434
458
  }
@@ -497,9 +521,15 @@ class Interpreter extends events_1.EventEmitter {
497
521
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
498
522
  this.options.debugChannel.setActionType('script');
499
523
  }
500
- const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
501
- const x = new AsyncFunction('page', 'log', code);
502
- yield x(page, this.log);
524
+ try {
525
+ const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
526
+ const x = new AsyncFunction('page', 'log', code);
527
+ yield x(page, this.log);
528
+ }
529
+ catch (error) {
530
+ this.log(`Script execution failed: ${error.message}`, logger_1.Level.ERROR);
531
+ throw new Error(`Script execution error: ${error.message}`);
532
+ }
503
533
  }),
504
534
  flag: () => __awaiter(this, void 0, void 0, function* () {
505
535
  return new Promise((res) => {
@@ -527,6 +557,11 @@ class Interpreter extends events_1.EventEmitter {
527
557
  }
528
558
  });
529
559
  for (const step of steps) {
560
+ // Check abort flag before each step
561
+ if (this.isAborted) {
562
+ this.log('Workflow aborted during step execution', logger_1.Level.WARN);
563
+ return;
564
+ }
530
565
  this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
531
566
  try {
532
567
  if (step.action in wawActions) {
@@ -583,6 +618,11 @@ class Interpreter extends events_1.EventEmitter {
583
618
  }
584
619
  handlePagination(page, config) {
585
620
  return __awaiter(this, void 0, void 0, function* () {
621
+ // Check abort flag at start of pagination
622
+ if (this.isAborted) {
623
+ this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
624
+ return [];
625
+ }
586
626
  let allResults = [];
587
627
  let previousHeight = 0;
588
628
  let scrapedItems = new Set();
@@ -594,6 +634,11 @@ class Interpreter extends events_1.EventEmitter {
594
634
  console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
595
635
  };
596
636
  const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
637
+ // Check abort flag before scraping current page
638
+ if (this.isAborted) {
639
+ debugLog("Workflow aborted, stopping scrapeCurrentPage");
640
+ return;
641
+ }
597
642
  const results = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
598
643
  const newResults = results.filter(item => {
599
644
  const uniqueKey = JSON.stringify(item);
@@ -705,6 +750,11 @@ class Interpreter extends events_1.EventEmitter {
705
750
  let unchangedResultCounter = 0;
706
751
  try {
707
752
  while (true) {
753
+ // Check abort flag at start of each pagination iteration
754
+ if (this.isAborted) {
755
+ this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
756
+ return allResults;
757
+ }
708
758
  switch (config.pagination.type) {
709
759
  case 'scrollDown': {
710
760
  let previousResultCount = allResults.length;
@@ -712,9 +762,14 @@ class Interpreter extends events_1.EventEmitter {
712
762
  if (checkLimit()) {
713
763
  return allResults;
714
764
  }
715
- yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
765
+ yield page.evaluate(() => {
766
+ const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
767
+ window.scrollTo(0, scrollHeight);
768
+ });
716
769
  yield page.waitForTimeout(2000);
717
- const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
770
+ const currentHeight = yield page.evaluate(() => {
771
+ return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
772
+ });
718
773
  const currentResultCount = allResults.length;
719
774
  if (currentResultCount === previousResultCount) {
720
775
  unchangedResultCounter++;
@@ -969,9 +1024,14 @@ class Interpreter extends events_1.EventEmitter {
969
1024
  }
970
1025
  // Wait for content to load and check scroll height
971
1026
  yield page.waitForTimeout(2000);
972
- yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
1027
+ yield page.evaluate(() => {
1028
+ const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1029
+ window.scrollTo(0, scrollHeight);
1030
+ });
973
1031
  yield page.waitForTimeout(2000);
974
- const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
1032
+ const currentHeight = yield page.evaluate(() => {
1033
+ return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1034
+ });
975
1035
  const heightChanged = currentHeight !== previousHeight;
976
1036
  previousHeight = currentHeight;
977
1037
  yield scrapeCurrentPage();
@@ -1936,7 +1996,19 @@ class Interpreter extends events_1.EventEmitter {
1936
1996
  this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
1937
1997
  });
1938
1998
  /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
1999
+ let loopIterations = 0;
2000
+ const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
1939
2001
  while (true) {
2002
+ // Circuit breaker to prevent infinite loops
2003
+ if (++loopIterations > MAX_LOOP_ITERATIONS) {
2004
+ this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
2005
+ return;
2006
+ }
2007
+ // Check abort flag immediately
2008
+ if (this.isAborted) {
2009
+ this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
2010
+ return;
2011
+ }
1940
2012
  // Checks whether the page was closed from outside,
1941
2013
  // or the workflow execution has been stopped via `interpreter.stop()`
1942
2014
  if (p.isClosed() || !this.stopper) {
@@ -2001,6 +2073,11 @@ class Interpreter extends events_1.EventEmitter {
2001
2073
  console.log("MATCHED ACTION ID:", actionId);
2002
2074
  this.log(`Matched ${JSON.stringify(action === null || action === void 0 ? void 0 : action.where)}`, logger_1.Level.LOG);
2003
2075
  if (action) { // action is matched
2076
+ // Check abort flag before executing action
2077
+ if (this.isAborted) {
2078
+ this.log('Workflow aborted before action execution', logger_1.Level.WARN);
2079
+ return;
2080
+ }
2004
2081
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.activeId) {
2005
2082
  this.options.debugChannel.activeId(actionId);
2006
2083
  }
@@ -2017,14 +2094,13 @@ class Interpreter extends events_1.EventEmitter {
2017
2094
  usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
2018
2095
  workflowCopy.splice(actionId, 1);
2019
2096
  console.log(`Action with ID ${action.id} removed from the workflow copy.`);
2020
- // const newSelectors = this.getPreviousSelectors(workflow, actionId);
2097
+ // Reset loop iteration counter on successful action
2098
+ loopIterations = 0;
2021
2099
  }
2022
2100
  catch (e) {
2023
2101
  this.log(e, logger_1.Level.ERROR);
2024
- console.log(`Action with ID ${action.id} failed: ${e.message}`);
2025
- // Still remove the failed action to prevent infinite loops
2026
- workflowCopy.splice(actionId, 1);
2027
- console.log(`Failed action removed. Remaining actions: ${workflowCopy.length}`);
2102
+ // Don't crash on individual action failures - continue with next iteration
2103
+ continue;
2028
2104
  }
2029
2105
  }
2030
2106
  else {
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.16",
3
+ "version": "0.0.17",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",
7
7
  "scripts": {
8
8
  "test": "jest",
9
- "build": "npm run clean && tsc",
9
+ "build": "tsc",
10
10
  "lint": "eslint .",
11
11
  "clean": "rimraf ./build"
12
12
  },
@@ -23,5 +23,8 @@
23
23
  "playwright": "^1.50.0",
24
24
  "playwright-extra": "^4.3.6",
25
25
  "puppeteer-extra-plugin-stealth": "^2.11.2"
26
+ },
27
+ "devDependencies": {
28
+ "@types/node": "^24.3.1"
26
29
  }
27
30
  }