mx-cloud 0.0.15 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,6 +54,7 @@ export default class Interpreter extends EventEmitter {
54
54
  private options;
55
55
  private concurrency;
56
56
  private stopper;
57
+ private isAborted;
57
58
  private log;
58
59
  private blocker;
59
60
  private cumulativeResults;
@@ -80,6 +81,10 @@ export default class Interpreter extends EventEmitter {
80
81
  * @returns True if `where` is applicable in the given context, false otherwise
81
82
  */
82
83
  private applicable;
84
+ /**
85
+ * Sets the abort flag to immediately stop all operations
86
+ */
87
+ abort(): void;
83
88
  /**
84
89
  * Given a Playwright's page object and a "declarative" list of actions, this function
85
90
  * calls all mentioned functions on the Page object.\
@@ -63,6 +63,7 @@ class Interpreter extends events_1.EventEmitter {
63
63
  var _a;
64
64
  super();
65
65
  this.stopper = null;
66
+ this.isAborted = false;
66
67
  this.blocker = null;
67
68
  this.cumulativeResults = [];
68
69
  this.autohealFailures = [];
@@ -90,7 +91,9 @@ class Interpreter extends events_1.EventEmitter {
90
91
  adblocker_playwright_1.PlaywrightBlocker.fromLists(cross_fetch_1.default, ['https://easylist.to/easylist/easylist.txt']).then(blocker => {
91
92
  this.blocker = blocker;
92
93
  }).catch(err => {
93
- this.log(`Failed to initialize ad-blocker:`, logger_1.Level.ERROR);
94
+ this.log(`Failed to initialize ad-blocker: ${err.message}`, logger_1.Level.ERROR);
95
+ // Continue without ad-blocker rather than crashing
96
+ this.blocker = null;
94
97
  });
95
98
  }
96
99
  trackAutohealFailure(error) {
@@ -307,6 +310,12 @@ class Interpreter extends events_1.EventEmitter {
307
310
  }
308
311
  });
309
312
  }
313
+ /**
314
+ * Sets the abort flag to immediately stop all operations
315
+ */
316
+ abort() {
317
+ this.isAborted = true;
318
+ }
310
319
  /**
311
320
  * Given a Playwright's page object and a "declarative" list of actions, this function
312
321
  * calls all mentioned functions on the Page object.\
@@ -318,7 +327,12 @@ class Interpreter extends events_1.EventEmitter {
318
327
  */
319
328
  carryOutSteps(page, steps) {
320
329
  return __awaiter(this, void 0, void 0, function* () {
321
- var _a;
330
+ var _a, _b;
331
+ // Check abort flag at start of execution
332
+ if (this.isAborted) {
333
+ this.log('Workflow aborted, stopping execution', logger_1.Level.WARN);
334
+ return;
335
+ }
322
336
  /**
323
337
  * Defines overloaded (or added) methods/actions usable in the workflow.
324
338
  * If a method overloads any existing method of the Page class, it accepts the same set
@@ -376,6 +390,11 @@ class Interpreter extends events_1.EventEmitter {
376
390
  }),
377
391
  scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
378
392
  var _a;
393
+ // Check abort flag at start of scraping
394
+ if (this.isAborted) {
395
+ this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
396
+ return;
397
+ }
379
398
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
380
399
  this.options.debugChannel.setActionType('scrapeSchema');
381
400
  }
@@ -388,21 +407,52 @@ class Interpreter extends events_1.EventEmitter {
388
407
  if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
389
408
  this.cumulativeResults = [];
390
409
  }
410
+ const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
391
411
  if (this.cumulativeResults.length === 0) {
392
- this.cumulativeResults.push({});
412
+ // First execution - create initial row
413
+ const newRow = {};
414
+ Object.entries(resultToProcess).forEach(([key, value]) => {
415
+ if (value !== undefined) {
416
+ newRow[key] = value;
417
+ }
418
+ });
419
+ this.cumulativeResults.push(newRow);
393
420
  }
394
- const mergedResult = this.cumulativeResults[0];
395
- const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
396
- Object.entries(resultToProcess).forEach(([key, value]) => {
397
- if (value !== undefined) {
398
- mergedResult[key] = value;
421
+ else {
422
+ // Check if any keys from new result already exist in the last row
423
+ const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
424
+ const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
425
+ const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
426
+ if (hasRepeatedKeys) {
427
+ // Keys are repeated - create a new row
428
+ const newRow = {};
429
+ Object.entries(resultToProcess).forEach(([key, value]) => {
430
+ if (value !== undefined) {
431
+ newRow[key] = value;
432
+ }
433
+ });
434
+ this.cumulativeResults.push(newRow);
399
435
  }
400
- });
401
- console.log("Updated merged result:", mergedResult);
402
- yield this.options.serializableCallback([mergedResult]);
436
+ else {
437
+ // No repeated keys - merge with the last row
438
+ Object.entries(resultToProcess).forEach(([key, value]) => {
439
+ if (value !== undefined) {
440
+ lastRow[key] = value;
441
+ }
442
+ });
443
+ }
444
+ }
445
+ console.log("Total accumulated rows:", this.cumulativeResults.length);
446
+ console.log("Current results:", this.cumulativeResults);
447
+ yield this.options.serializableCallback(this.cumulativeResults);
403
448
  }),
404
449
  scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
405
450
  var _a, _b;
451
+ // Check abort flag at start of scraping
452
+ if (this.isAborted) {
453
+ this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
454
+ return;
455
+ }
406
456
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
407
457
  this.options.debugChannel.setActionType('scrapeList');
408
458
  }
@@ -410,17 +460,37 @@ class Interpreter extends events_1.EventEmitter {
410
460
  yield this.options.serializableCallback({});
411
461
  return;
412
462
  }
413
- yield this.ensureScriptsLoaded(page);
414
- if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.incrementScrapeListIndex) {
415
- this.options.debugChannel.incrementScrapeListIndex();
416
- }
417
- if (!config.pagination) {
418
- const scrapeResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
463
+ try {
464
+ yield this.ensureScriptsLoaded(page);
465
+ if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.incrementScrapeListIndex) {
466
+ this.options.debugChannel.incrementScrapeListIndex();
467
+ }
468
+ let scrapeResults = [];
469
+ if (!config.pagination) {
470
+ scrapeResults = yield page.evaluate((cfg) => {
471
+ try {
472
+ return window.scrapeList(cfg);
473
+ }
474
+ catch (error) {
475
+ console.warn('ScrapeList evaluation failed:', error.message);
476
+ return []; // Return empty array instead of failing
477
+ }
478
+ }, config);
479
+ }
480
+ else {
481
+ scrapeResults = yield this.handlePagination(page, config);
482
+ }
483
+ // Ensure we always have an array
484
+ if (!Array.isArray(scrapeResults)) {
485
+ scrapeResults = [];
486
+ }
487
+ console.log(`ScrapeList completed with ${scrapeResults.length} results`);
419
488
  yield this.options.serializableCallback(scrapeResults);
420
489
  }
421
- else {
422
- const scrapeResults = yield this.handlePagination(page, config);
423
- yield this.options.serializableCallback(scrapeResults);
490
+ catch (error) {
491
+ console.error('ScrapeList action failed completely:', error.message);
492
+ // Don't throw error, just return empty array
493
+ yield this.options.serializableCallback([]);
424
494
  }
425
495
  }),
426
496
  scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
@@ -451,9 +521,15 @@ class Interpreter extends events_1.EventEmitter {
451
521
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
452
522
  this.options.debugChannel.setActionType('script');
453
523
  }
454
- const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
455
- const x = new AsyncFunction('page', 'log', code);
456
- yield x(page, this.log);
524
+ try {
525
+ const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
526
+ const x = new AsyncFunction('page', 'log', code);
527
+ yield x(page, this.log);
528
+ }
529
+ catch (error) {
530
+ this.log(`Script execution failed: ${error.message}`, logger_1.Level.ERROR);
531
+ throw new Error(`Script execution error: ${error.message}`);
532
+ }
457
533
  }),
458
534
  flag: () => __awaiter(this, void 0, void 0, function* () {
459
535
  return new Promise((res) => {
@@ -481,54 +557,72 @@ class Interpreter extends events_1.EventEmitter {
481
557
  }
482
558
  });
483
559
  for (const step of steps) {
484
- this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
485
- if (step.action in wawActions) {
486
- // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
487
- const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
488
- yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
560
+ // Check abort flag before each step
561
+ if (this.isAborted) {
562
+ this.log('Workflow aborted during step execution', logger_1.Level.WARN);
563
+ return;
489
564
  }
490
- else {
491
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
492
- this.options.debugChannel.setActionType(String(step.action));
493
- }
494
- // Implements the dot notation for the "method name" in the workflow
495
- const levels = String(step.action).split('.');
496
- const methodName = levels[levels.length - 1];
497
- let invokee = page;
498
- for (const level of levels.splice(0, levels.length - 1)) {
499
- invokee = invokee[level];
565
+ this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
566
+ try {
567
+ if (step.action in wawActions) {
568
+ // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
569
+ const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
570
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
500
571
  }
501
- if (methodName === 'waitForLoadState') {
502
- try {
503
- yield executeAction(invokee, methodName, step.args);
572
+ else {
573
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
574
+ this.options.debugChannel.setActionType(String(step.action));
504
575
  }
505
- catch (error) {
506
- yield executeAction(invokee, methodName, 'domcontentloaded');
576
+ // Implements the dot notation for the "method name" in the workflow
577
+ const levels = String(step.action).split('.');
578
+ const methodName = levels[levels.length - 1];
579
+ let invokee = page;
580
+ for (const level of levels.splice(0, levels.length - 1)) {
581
+ invokee = invokee[level];
507
582
  }
508
- }
509
- else if (methodName === 'click') {
510
- try {
511
- yield executeAction(invokee, methodName, step.args);
583
+ if (methodName === 'waitForLoadState') {
584
+ try {
585
+ yield executeAction(invokee, methodName, step.args);
586
+ }
587
+ catch (error) {
588
+ yield executeAction(invokee, methodName, 'domcontentloaded');
589
+ }
512
590
  }
513
- catch (error) {
591
+ else if (methodName === 'click') {
514
592
  try {
515
- yield executeAction(invokee, methodName, [step.args[0], { force: true }]);
593
+ yield executeAction(invokee, methodName, step.args);
516
594
  }
517
595
  catch (error) {
518
- continue;
596
+ try {
597
+ yield executeAction(invokee, methodName, [step.args[0], { force: true }]);
598
+ }
599
+ catch (error) {
600
+ this.log(`Click action failed for selector ${(_b = step.args) === null || _b === void 0 ? void 0 : _b[0]}: ${error.message}`, logger_1.Level.WARN);
601
+ continue; // Skip to next action
602
+ }
519
603
  }
520
604
  }
521
- }
522
- else {
523
- yield executeAction(invokee, methodName, step.args);
605
+ else {
606
+ yield executeAction(invokee, methodName, step.args);
607
+ }
524
608
  }
525
609
  }
610
+ catch (error) {
611
+ this.log(`Action ${String(step.action)} failed: ${error.message}`, logger_1.Level.WARN);
612
+ // Continue to next action instead of breaking
613
+ continue;
614
+ }
526
615
  yield new Promise((res) => { setTimeout(res, 500); });
527
616
  }
528
617
  });
529
618
  }
530
619
  handlePagination(page, config) {
531
620
  return __awaiter(this, void 0, void 0, function* () {
621
+ // Check abort flag at start of pagination
622
+ if (this.isAborted) {
623
+ this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
624
+ return [];
625
+ }
532
626
  let allResults = [];
533
627
  let previousHeight = 0;
534
628
  let scrapedItems = new Set();
@@ -540,6 +634,11 @@ class Interpreter extends events_1.EventEmitter {
540
634
  console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
541
635
  };
542
636
  const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
637
+ // Check abort flag before scraping current page
638
+ if (this.isAborted) {
639
+ debugLog("Workflow aborted, stopping scrapeCurrentPage");
640
+ return;
641
+ }
543
642
  const results = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
544
643
  const newResults = results.filter(item => {
545
644
  const uniqueKey = JSON.stringify(item);
@@ -651,6 +750,11 @@ class Interpreter extends events_1.EventEmitter {
651
750
  let unchangedResultCounter = 0;
652
751
  try {
653
752
  while (true) {
753
+ // Check abort flag at start of each pagination iteration
754
+ if (this.isAborted) {
755
+ this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
756
+ return allResults;
757
+ }
654
758
  switch (config.pagination.type) {
655
759
  case 'scrollDown': {
656
760
  let previousResultCount = allResults.length;
@@ -658,9 +762,14 @@ class Interpreter extends events_1.EventEmitter {
658
762
  if (checkLimit()) {
659
763
  return allResults;
660
764
  }
661
- yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
765
+ yield page.evaluate(() => {
766
+ const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
767
+ window.scrollTo(0, scrollHeight);
768
+ });
662
769
  yield page.waitForTimeout(2000);
663
- const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
770
+ const currentHeight = yield page.evaluate(() => {
771
+ return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
772
+ });
664
773
  const currentResultCount = allResults.length;
665
774
  if (currentResultCount === previousResultCount) {
666
775
  unchangedResultCounter++;
@@ -915,9 +1024,14 @@ class Interpreter extends events_1.EventEmitter {
915
1024
  }
916
1025
  // Wait for content to load and check scroll height
917
1026
  yield page.waitForTimeout(2000);
918
- yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
1027
+ yield page.evaluate(() => {
1028
+ const scrollHeight = Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1029
+ window.scrollTo(0, scrollHeight);
1030
+ });
919
1031
  yield page.waitForTimeout(2000);
920
- const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
1032
+ const currentHeight = yield page.evaluate(() => {
1033
+ return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);
1034
+ });
921
1035
  const heightChanged = currentHeight !== previousHeight;
922
1036
  previousHeight = currentHeight;
923
1037
  yield scrapeCurrentPage();
@@ -1882,7 +1996,19 @@ class Interpreter extends events_1.EventEmitter {
1882
1996
  this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
1883
1997
  });
1884
1998
  /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
1999
+ let loopIterations = 0;
2000
+ const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
1885
2001
  while (true) {
2002
+ // Circuit breaker to prevent infinite loops
2003
+ if (++loopIterations > MAX_LOOP_ITERATIONS) {
2004
+ this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
2005
+ return;
2006
+ }
2007
+ // Check abort flag immediately
2008
+ if (this.isAborted) {
2009
+ this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
2010
+ return;
2011
+ }
1886
2012
  // Checks whether the page was closed from outside,
1887
2013
  // or the workflow execution has been stopped via `interpreter.stop()`
1888
2014
  if (p.isClosed() || !this.stopper) {
@@ -1895,26 +2021,29 @@ class Interpreter extends events_1.EventEmitter {
1895
2021
  yield p.close();
1896
2022
  return;
1897
2023
  }
1898
- const newSelectors = this.getSelectors(workflowCopy);
1899
- newSelectors.forEach(selector => {
1900
- if (!selectors.includes(selector)) {
1901
- selectors.push(selector);
1902
- }
1903
- });
1904
- let pageState = {};
1905
- let getStateTest = "Hello";
1906
- try {
1907
- pageState = yield this.getState(p, workflowCopy, selectors);
1908
- selectors = [];
1909
- console.log("Empty selectors:", selectors);
1910
- }
1911
- catch (e) {
1912
- this.log('The browser has been closed.');
2024
+ if (workflowCopy.length === 0) {
2025
+ this.log('All actions completed. Workflow finished.', logger_1.Level.LOG);
1913
2026
  return;
1914
2027
  }
1915
- if (this.options.debug) {
1916
- this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, logger_1.Level.WARN);
1917
- }
2028
+ // const newSelectors = this.getSelectors(workflowCopy);
2029
+ // newSelectors.forEach(selector => {
2030
+ // if (!selectors.includes(selector)) {
2031
+ // selectors.push(selector);
2032
+ // }
2033
+ // });
2034
+ // let pageState = {};
2035
+ // let getStateTest = "Hello";
2036
+ // try {
2037
+ // pageState = await this.getState(p, workflowCopy, selectors);
2038
+ // selectors = [];
2039
+ // console.log("Empty selectors:", selectors)
2040
+ // } catch (e: any) {
2041
+ // this.log('The browser has been closed.');
2042
+ // return;
2043
+ // }
2044
+ // if (this.options.debug) {
2045
+ // this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN);
2046
+ // }
1918
2047
  // const actionId = workflow.findIndex((step) => {
1919
2048
  // const isApplicable = this.applicable(step.where, pageState, usedActions);
1920
2049
  // console.log("-------------------------------------------------------------");
@@ -1934,15 +2063,21 @@ class Interpreter extends events_1.EventEmitter {
1934
2063
  // console.log("SCHEMA CHANGES:", changes);
1935
2064
  // }
1936
2065
  // }
1937
- actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions);
1938
- if (actionId !== -1 && workflowCopy[actionId]) {
1939
- workflowCopy[actionId] = yield this.validateWorkflowAction(p, workflowCopy[actionId]);
1940
- }
2066
+ // actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions);
2067
+ // if (actionId !== -1 && workflowCopy[actionId]) {
2068
+ // workflowCopy[actionId] = await this.validateWorkflowAction(p, workflowCopy[actionId]);
2069
+ // }
2070
+ const actionId = workflowCopy.length - 1;
1941
2071
  const action = workflowCopy[actionId];
1942
2072
  console.log("MATCHED ACTION:", action);
1943
2073
  console.log("MATCHED ACTION ID:", actionId);
1944
2074
  this.log(`Matched ${JSON.stringify(action === null || action === void 0 ? void 0 : action.where)}`, logger_1.Level.LOG);
1945
2075
  if (action) { // action is matched
2076
+ // Check abort flag before executing action
2077
+ if (this.isAborted) {
2078
+ this.log('Workflow aborted before action execution', logger_1.Level.WARN);
2079
+ return;
2080
+ }
1946
2081
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.activeId) {
1947
2082
  this.options.debugChannel.activeId(actionId);
1948
2083
  }
@@ -1959,10 +2094,13 @@ class Interpreter extends events_1.EventEmitter {
1959
2094
  usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
1960
2095
  workflowCopy.splice(actionId, 1);
1961
2096
  console.log(`Action with ID ${action.id} removed from the workflow copy.`);
1962
- // const newSelectors = this.getPreviousSelectors(workflow, actionId);
2097
+ // Reset loop iteration counter on successful action
2098
+ loopIterations = 0;
1963
2099
  }
1964
2100
  catch (e) {
1965
2101
  this.log(e, logger_1.Level.ERROR);
2102
+ // Don't crash on individual action failures - continue with next iteration
2103
+ continue;
1966
2104
  }
1967
2105
  }
1968
2106
  else {
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.15",
3
+ "version": "0.0.17",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",
7
7
  "scripts": {
8
8
  "test": "jest",
9
- "build": "npm run clean && tsc",
9
+ "build": "tsc",
10
10
  "lint": "eslint .",
11
11
  "clean": "rimraf ./build"
12
12
  },
@@ -23,5 +23,8 @@
23
23
  "playwright": "^1.50.0",
24
24
  "playwright-extra": "^4.3.6",
25
25
  "puppeteer-extra-plugin-stealth": "^2.11.2"
26
+ },
27
+ "devDependencies": {
28
+ "@types/node": "^24.3.1"
26
29
  }
27
30
  }