mx-cloud 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,10 @@ export default class Interpreter extends EventEmitter {
85
85
  * Sets the abort flag to immediately stop all operations
86
86
  */
87
87
  abort(): void;
88
+ /**
89
+ * Returns the current abort status
90
+ */
91
+ getIsAborted(): boolean;
88
92
  /**
89
93
  * Given a Playwright's page object and a "declarative" list of actions, this function
90
94
  * calls all mentioned functions on the Page object.\
@@ -316,6 +316,12 @@ class Interpreter extends events_1.EventEmitter {
316
316
  abort() {
317
317
  this.isAborted = true;
318
318
  }
319
+ /**
320
+ * Returns the current abort status
321
+ */
322
+ getIsAborted() {
323
+ return this.isAborted;
324
+ }
319
325
  /**
320
326
  * Given a Playwright's page object and a "declarative" list of actions, this function
321
327
  * calls all mentioned functions on the Page object.\
@@ -582,7 +588,15 @@ class Interpreter extends events_1.EventEmitter {
582
588
  }
583
589
  if (methodName === 'waitForLoadState') {
584
590
  try {
585
- yield executeAction(invokee, methodName, step.args);
591
+ // Add timeout if not already specified
592
+ let args = step.args;
593
+ if (Array.isArray(args) && args.length === 1) {
594
+ args = [args[0], { timeout: 30000 }];
595
+ }
596
+ else if (!Array.isArray(args)) {
597
+ args = [args, { timeout: 30000 }];
598
+ }
599
+ yield executeAction(invokee, methodName, args);
586
600
  }
587
601
  catch (error) {
588
602
  yield executeAction(invokee, methodName, 'domcontentloaded');
@@ -639,7 +653,17 @@ class Interpreter extends events_1.EventEmitter {
639
653
  debugLog("Workflow aborted, stopping scrapeCurrentPage");
640
654
  return;
641
655
  }
642
- const results = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
656
+ // Add timeout to prevent hanging on page evaluation
657
+ const evaluationPromise = page.evaluate((cfg) => window.scrapeList(cfg), config);
658
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Page evaluation timeout')), 10000));
659
+ let results;
660
+ try {
661
+ results = yield Promise.race([evaluationPromise, timeoutPromise]);
662
+ }
663
+ catch (error) {
664
+ debugLog(`Page evaluation failed: ${error.message}`);
665
+ return;
666
+ }
643
667
  const newResults = results.filter(item => {
644
668
  const uniqueKey = JSON.stringify(item);
645
669
  if (scrapedItems.has(uniqueKey))
@@ -696,14 +720,22 @@ class Interpreter extends events_1.EventEmitter {
696
720
  });
697
721
  // Enhanced button finder with retry mechanism for both CSS and XPath selectors
698
722
  const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
723
+ const startTime = Date.now();
724
+ const MAX_BUTTON_SEARCH_TIME = 15000;
699
725
  let updatedSelectors = [...selectors];
700
726
  for (let i = 0; i < selectors.length; i++) {
727
+ // Check overall timeout
728
+ if (Date.now() - startTime > MAX_BUTTON_SEARCH_TIME) {
729
+ debugLog(`Button search timeout reached (${MAX_BUTTON_SEARCH_TIME}ms), aborting`);
730
+ break;
731
+ }
701
732
  const selector = selectors[i];
702
733
  let retryCount = 0;
703
734
  let selectorSuccess = false;
704
735
  while (retryCount < MAX_RETRIES && !selectorSuccess) {
705
736
  try {
706
- const button = yield waitForSelectorUniversal(selector, { timeout: 10000 });
737
+ // Reduce timeout to prevent hanging on slow selectors
738
+ const button = yield waitForSelectorUniversal(selector, { timeout: 2000 });
707
739
  if (button) {
708
740
  debugLog('Found working selector:', selector);
709
741
  return {
@@ -712,16 +744,30 @@ class Interpreter extends events_1.EventEmitter {
712
744
  updatedSelectors
713
745
  };
714
746
  }
747
+ else {
748
+ // Treat null result as failed attempt
749
+ retryCount++;
750
+ debugLog(`Selector "${selector}" not found: attempt ${retryCount}/${MAX_RETRIES}`);
751
+ if (retryCount < MAX_RETRIES) {
752
+ yield page.waitForTimeout(RETRY_DELAY);
753
+ }
754
+ else {
755
+ debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
756
+ updatedSelectors = updatedSelectors.filter(s => s !== selector);
757
+ selectorSuccess = true; // Exit retry loop for this selector
758
+ }
759
+ }
715
760
  }
716
761
  catch (error) {
717
762
  retryCount++;
718
- debugLog(`Selector "${selector}" failed: attempt ${retryCount}/${MAX_RETRIES}`);
763
+ debugLog(`Selector "${selector}" error: attempt ${retryCount}/${MAX_RETRIES} - ${error.message}`);
719
764
  if (retryCount < MAX_RETRIES) {
720
765
  yield page.waitForTimeout(RETRY_DELAY);
721
766
  }
722
767
  else {
723
768
  debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
724
769
  updatedSelectors = updatedSelectors.filter(s => s !== selector);
770
+ selectorSuccess = true; // Exit retry loop for this selector
725
771
  }
726
772
  }
727
773
  }
@@ -748,6 +794,10 @@ class Interpreter extends events_1.EventEmitter {
748
794
  });
749
795
  let availableSelectors = config.pagination.selector.split(',');
750
796
  let unchangedResultCounter = 0;
797
+ let paginationIterations = 0;
798
+ const MAX_PAGINATION_ITERATIONS = 100; // Prevent infinite pagination
799
+ const paginationStartTime = Date.now();
800
+ const MAX_PAGINATION_TIME = 30 * 60 * 1000; // 30 minutes max for pagination
751
801
  try {
752
802
  while (true) {
753
803
  // Check abort flag at start of each pagination iteration
@@ -755,6 +805,19 @@ class Interpreter extends events_1.EventEmitter {
755
805
  this.log('Workflow aborted during pagination loop', logger_1.Level.WARN);
756
806
  return allResults;
757
807
  }
808
+ // Pagination circuit breakers
809
+ if (++paginationIterations > MAX_PAGINATION_ITERATIONS) {
810
+ debugLog(`Maximum pagination iterations reached (${MAX_PAGINATION_ITERATIONS}), stopping`);
811
+ return allResults;
812
+ }
813
+ if (Date.now() - paginationStartTime > MAX_PAGINATION_TIME) {
814
+ debugLog('Maximum pagination time reached (10 minutes), stopping');
815
+ return allResults;
816
+ }
817
+ // Add async yield every 5 iterations to prevent event loop blocking
818
+ if (paginationIterations % 5 === 0) {
819
+ yield new Promise(resolve => setImmediate(resolve));
820
+ }
758
821
  switch (config.pagination.type) {
759
822
  case 'scrollDown': {
760
823
  let previousResultCount = allResults.length;
@@ -971,10 +1034,23 @@ class Interpreter extends events_1.EventEmitter {
971
1034
  if (checkLimit())
972
1035
  return allResults;
973
1036
  let loadMoreCounter = 0;
974
- // let previousResultCount = allResults.length;
975
- // let noNewItemsCounter = 0;
976
- // const MAX_NO_NEW_ITEMS = 2;
1037
+ const MAX_LOAD_MORE_ITERATIONS = 100; // Prevent infinite load more
1038
+ const loadMoreStartTime = Date.now();
1039
+ const MAX_LOAD_MORE_TIME = 30 * 60 * 1000; // 5 minutes max for load more
977
1040
  while (true) {
1041
+ // Load more circuit breakers
1042
+ if (loadMoreCounter >= MAX_LOAD_MORE_ITERATIONS) {
1043
+ debugLog(`Maximum load more iterations reached (${MAX_LOAD_MORE_ITERATIONS}), stopping`);
1044
+ return allResults;
1045
+ }
1046
+ if (Date.now() - loadMoreStartTime > MAX_LOAD_MORE_TIME) {
1047
+ debugLog('Maximum load more time reached (5 minutes), stopping');
1048
+ return allResults;
1049
+ }
1050
+ // Add async yield every 3 iterations
1051
+ if (loadMoreCounter % 3 === 0 && loadMoreCounter > 0) {
1052
+ yield new Promise(resolve => setImmediate(resolve));
1053
+ }
978
1054
  // Find working button with retry mechanism
979
1055
  const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
980
1056
  availableSelectors = updatedSelectors;
@@ -1730,12 +1806,35 @@ class Interpreter extends events_1.EventEmitter {
1730
1806
  selector.includes('@id=');
1731
1807
  let count = 0;
1732
1808
  if (isXPath) {
1809
+ // Add timeout to prevent XPath hanging
1733
1810
  const locator = page.locator(`xpath=${selector}`);
1734
- count = yield locator.count();
1811
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('XPath timeout')), 5000));
1812
+ try {
1813
+ count = yield Promise.race([
1814
+ locator.count(),
1815
+ timeoutPromise
1816
+ ]);
1817
+ }
1818
+ catch (error) {
1819
+ // XPath timed out or failed
1820
+ return false;
1821
+ }
1735
1822
  }
1736
1823
  else {
1737
- const elements = yield page.$$(selector);
1738
- count = elements ? elements.length : 0;
1824
+ // Add timeout to CSS selector operations
1825
+ try {
1826
+ const elementsPromise = page.$$(selector);
1827
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('CSS selector timeout')), 5000));
1828
+ const elements = yield Promise.race([
1829
+ elementsPromise,
1830
+ timeoutPromise
1831
+ ]);
1832
+ count = elements ? elements.length : 0;
1833
+ }
1834
+ catch (error) {
1835
+ // CSS selector timed out or failed
1836
+ return false;
1837
+ }
1739
1838
  }
1740
1839
  // For list selectors, we need multiple elements
1741
1840
  if (isListSelector) {
@@ -1998,12 +2097,26 @@ class Interpreter extends events_1.EventEmitter {
1998
2097
  /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
1999
2098
  let loopIterations = 0;
2000
2099
  const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
2100
+ let consecutiveFailures = 0;
2101
+ const MAX_CONSECUTIVE_FAILURES = 10;
2102
+ const startTime = Date.now();
2103
+ const MAX_EXECUTION_TIME = 30 * 60 * 1000; // 30 minutes max
2001
2104
  while (true) {
2002
- // Circuit breaker to prevent infinite loops
2105
+ // Multiple circuit breakers to prevent infinite loops
2003
2106
  if (++loopIterations > MAX_LOOP_ITERATIONS) {
2004
2107
  this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
2005
2108
  return;
2006
2109
  }
2110
+ // Time-based circuit breaker
2111
+ if (Date.now() - startTime > MAX_EXECUTION_TIME) {
2112
+ this.log('Maximum execution time reached (30 minutes), terminating workflow', logger_1.Level.ERROR);
2113
+ return;
2114
+ }
2115
+ // Failure-based circuit breaker
2116
+ if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
2117
+ this.log('Too many consecutive failures, terminating to prevent hang', logger_1.Level.ERROR);
2118
+ return;
2119
+ }
2007
2120
  // Check abort flag immediately
2008
2121
  if (this.isAborted) {
2009
2122
  this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
@@ -2094,11 +2207,19 @@ class Interpreter extends events_1.EventEmitter {
2094
2207
  usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
2095
2208
  workflowCopy.splice(actionId, 1);
2096
2209
  console.log(`Action with ID ${action.id} removed from the workflow copy.`);
2097
- // Reset loop iteration counter on successful action
2098
- loopIterations = 0;
2210
+ // Reset counters on successful action (but keep some history to prevent infinite resets)
2211
+ loopIterations = Math.max(0, loopIterations - 10);
2212
+ consecutiveFailures = 0;
2213
+ // Add async yield to prevent event loop blocking
2214
+ if (loopIterations % 10 === 0) {
2215
+ yield new Promise(resolve => setImmediate(resolve));
2216
+ }
2099
2217
  }
2100
2218
  catch (e) {
2101
2219
  this.log(e, logger_1.Level.ERROR);
2220
+ consecutiveFailures++;
2221
+ // Add delay on failures to prevent tight error loops
2222
+ yield new Promise(resolve => setTimeout(resolve, Math.min(1000, consecutiveFailures * 200)));
2102
2223
  // Don't crash on individual action failures - continue with next iteration
2103
2224
  continue;
2104
2225
  }
@@ -2112,9 +2233,32 @@ class Interpreter extends events_1.EventEmitter {
2112
2233
  }
2113
2234
  ensureScriptsLoaded(page) {
2114
2235
  return __awaiter(this, void 0, void 0, function* () {
2115
- const isScriptLoaded = yield page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function');
2116
- if (!isScriptLoaded) {
2117
- yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
2236
+ try {
2237
+ // Add timeout to prevent hanging on script evaluation
2238
+ const evaluationPromise = page.evaluate(() => typeof window.scrape === 'function' &&
2239
+ typeof window.scrapeSchema === 'function' &&
2240
+ typeof window.scrapeList === 'function' &&
2241
+ typeof window.scrapeListAuto === 'function' &&
2242
+ typeof window.scrollDown === 'function' &&
2243
+ typeof window.scrollUp === 'function');
2244
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Script check timeout')), 3000));
2245
+ const isScriptLoaded = yield Promise.race([
2246
+ evaluationPromise,
2247
+ timeoutPromise
2248
+ ]);
2249
+ if (!isScriptLoaded) {
2250
+ yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
2251
+ }
2252
+ }
2253
+ catch (error) {
2254
+ // If script check fails, try to add the script anyway
2255
+ this.log(`Script check failed, adding script anyway: ${error.message}`, logger_1.Level.WARN);
2256
+ try {
2257
+ yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
2258
+ }
2259
+ catch (scriptError) {
2260
+ this.log(`Failed to add script: ${scriptError.message}`, logger_1.Level.ERROR);
2261
+ }
2118
2262
  }
2119
2263
  });
2120
2264
  }
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.17",
3
+ "version": "0.0.18",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",
7
7
  "scripts": {
8
8
  "test": "jest",
9
- "build": "tsc",
9
+ "build": "npm run clean && tsc",
10
10
  "lint": "eslint .",
11
11
  "clean": "rimraf ./build"
12
12
  },
@@ -23,8 +23,5 @@
23
23
  "playwright": "^1.50.0",
24
24
  "playwright-extra": "^4.3.6",
25
25
  "puppeteer-extra-plugin-stealth": "^2.11.2"
26
- },
27
- "devDependencies": {
28
- "@types/node": "^24.3.1"
29
26
  }
30
27
  }