mx-cloud 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -160,5 +160,10 @@ export default class Interpreter extends EventEmitter {
160
160
  */
161
161
  run(page: Page, params?: ParamType): Promise<void>;
162
162
  stop(): Promise<void>;
163
+ /**
164
+ * Cleanup method to release resources and prevent memory leaks
165
+ * Call this when the interpreter is no longer needed
166
+ */
167
+ cleanup(): Promise<void>;
163
168
  }
164
169
  export {};
@@ -394,8 +394,9 @@ class Interpreter extends events_1.EventEmitter {
394
394
  for (const link of links) {
395
395
  // eslint-disable-next-line
396
396
  this.concurrency.addJob(() => __awaiter(this, void 0, void 0, function* () {
397
+ let newPage = null;
397
398
  try {
398
- const newPage = yield context.newPage();
399
+ newPage = yield context.newPage();
399
400
  yield newPage.goto(link);
400
401
  yield newPage.waitForLoadState('networkidle');
401
402
  yield this.runLoop(newPage, this.initializedWorkflow);
@@ -406,6 +407,16 @@ class Interpreter extends events_1.EventEmitter {
406
407
  // the interpreter by throwing).
407
408
  this.log(e, logger_1.Level.ERROR);
408
409
  }
410
+ finally {
411
+ if (newPage && !newPage.isClosed()) {
412
+ try {
413
+ yield newPage.close();
414
+ }
415
+ catch (closeError) {
416
+ this.log('Failed to close enqueued page', logger_1.Level.WARN);
417
+ }
418
+ }
419
+ }
409
420
  }));
410
421
  }
411
422
  yield page.close();
@@ -419,9 +430,8 @@ class Interpreter extends events_1.EventEmitter {
419
430
  const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
420
431
  yield this.options.serializableCallback(scrapeResults);
421
432
  }),
422
- scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
433
+ scrapeSchema: (schema_1, ...args_1) => __awaiter(this, [schema_1, ...args_1], void 0, function* (schema, actionName = "") {
423
434
  var _a;
424
- // Check abort flag at start of scraping
425
435
  if (this.isAborted) {
426
436
  this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
427
437
  return;
@@ -440,7 +450,6 @@ class Interpreter extends events_1.EventEmitter {
440
450
  }
441
451
  const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
442
452
  if (this.cumulativeResults.length === 0) {
443
- // First execution - create initial row
444
453
  const newRow = {};
445
454
  Object.entries(resultToProcess).forEach(([key, value]) => {
446
455
  if (value !== undefined) {
@@ -450,12 +459,10 @@ class Interpreter extends events_1.EventEmitter {
450
459
  this.cumulativeResults.push(newRow);
451
460
  }
452
461
  else {
453
- // Check if any keys from new result already exist in the last row
454
462
  const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
455
463
  const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
456
464
  const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
457
465
  if (hasRepeatedKeys) {
458
- // Keys are repeated - create a new row
459
466
  const newRow = {};
460
467
  Object.entries(resultToProcess).forEach(([key, value]) => {
461
468
  if (value !== undefined) {
@@ -465,7 +472,6 @@ class Interpreter extends events_1.EventEmitter {
465
472
  this.cumulativeResults.push(newRow);
466
473
  }
467
474
  else {
468
- // No repeated keys - merge with the last row
469
475
  Object.entries(resultToProcess).forEach(([key, value]) => {
470
476
  if (value !== undefined) {
471
477
  lastRow[key] = value;
@@ -473,30 +479,24 @@ class Interpreter extends events_1.EventEmitter {
473
479
  });
474
480
  }
475
481
  }
476
- console.log("Total accumulated rows:", this.cumulativeResults.length);
477
- console.log("Current results:", this.cumulativeResults);
478
- // ✅ Append schema results under "scrapeSchema" → name
479
482
  const actionType = "scrapeSchema";
480
- const actionName = schema.__name || "Texts";
483
+ const name = actionName || "Texts";
481
484
  if (!this.namedResults[actionType])
482
485
  this.namedResults[actionType] = {};
483
- this.namedResults[actionType][actionName] = this.cumulativeResults;
486
+ this.namedResults[actionType][name] = this.cumulativeResults;
484
487
  if (!this.serializableDataByType[actionType])
485
488
  this.serializableDataByType[actionType] = {};
486
- if (!this.serializableDataByType[actionType][actionName]) {
487
- this.serializableDataByType[actionType][actionName] = [];
489
+ if (!this.serializableDataByType[actionType][name]) {
490
+ this.serializableDataByType[actionType][name] = [];
488
491
  }
489
- // Store as array (matching cumulativeResults structure)
490
- this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
491
- // now emit full structured object
492
+ this.serializableDataByType[actionType][name] = [...this.cumulativeResults];
492
493
  yield this.options.serializableCallback({
493
494
  scrapeList: this.serializableDataByType.scrapeList,
494
495
  scrapeSchema: this.serializableDataByType.scrapeSchema
495
496
  });
496
497
  }),
497
- scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
498
+ scrapeList: (config_1, ...args_1) => __awaiter(this, [config_1, ...args_1], void 0, function* (config, actionName = "") {
498
499
  var _a, _b;
499
- // Check abort flag at start of scraping
500
500
  if (this.isAborted) {
501
501
  this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
502
502
  return;
@@ -522,36 +522,31 @@ class Interpreter extends events_1.EventEmitter {
522
522
  }
523
523
  catch (error) {
524
524
  console.warn('ScrapeList evaluation failed:', error.message);
525
- return []; // Return empty array instead of failing
525
+ return [];
526
526
  }
527
527
  }, config);
528
528
  }
529
529
  else {
530
530
  paginationUsed = true;
531
- scrapeResults = yield this.handlePagination(page, config);
531
+ scrapeResults = yield this.handlePagination(page, config, actionName);
532
532
  }
533
- // Ensure we always have an array
534
533
  if (!Array.isArray(scrapeResults)) {
535
534
  scrapeResults = [];
536
535
  }
537
536
  console.log(`ScrapeList completed with ${scrapeResults.length} results`);
538
- // Only process and callback if pagination wasn't used
539
- // (handlePagination already handles storage and callbacks internally)
540
537
  if (!paginationUsed) {
541
- // ✅ Append list results under "scrapeList" → name
542
538
  const actionType = "scrapeList";
543
- let actionName = config.__name || "";
544
- // If no name provided, generate a unique one
545
- if (!actionName || actionName.trim() === "") {
539
+ let name = actionName || "";
540
+ if (!name || name.trim() === "") {
546
541
  this.scrapeListCounter++;
547
- actionName = `List ${this.scrapeListCounter}`;
542
+ name = `List ${this.scrapeListCounter}`;
548
543
  }
549
544
  if (!this.serializableDataByType[actionType])
550
545
  this.serializableDataByType[actionType] = {};
551
- if (!this.serializableDataByType[actionType][actionName]) {
552
- this.serializableDataByType[actionType][actionName] = [];
546
+ if (!this.serializableDataByType[actionType][name]) {
547
+ this.serializableDataByType[actionType][name] = [];
553
548
  }
554
- this.serializableDataByType[actionType][actionName].push(...scrapeResults);
549
+ this.serializableDataByType[actionType][name].push(...scrapeResults);
555
550
  yield this.options.serializableCallback({
556
551
  scrapeList: this.serializableDataByType.scrapeList,
557
552
  scrapeSchema: this.serializableDataByType.scrapeSchema
@@ -560,15 +555,18 @@ class Interpreter extends events_1.EventEmitter {
560
555
  }
561
556
  catch (error) {
562
557
  console.error('ScrapeList action failed completely:', error.message);
563
- // Don't throw error, just return empty array
564
558
  const actionType = "scrapeList";
565
- const actionName = config.__name || "List";
559
+ let name = actionName || "";
560
+ if (!name || name.trim() === "") {
561
+ this.scrapeListCounter++;
562
+ name = `List ${this.scrapeListCounter}`;
563
+ }
566
564
  if (!this.namedResults[actionType])
567
565
  this.namedResults[actionType] = {};
568
- this.namedResults[actionType][actionName] = [];
566
+ this.namedResults[actionType][name] = [];
569
567
  if (!this.serializableDataByType[actionType])
570
568
  this.serializableDataByType[actionType] = {};
571
- this.serializableDataByType[actionType][actionName] = [];
569
+ this.serializableDataByType[actionType][name] = [];
572
570
  yield this.options.serializableCallback({
573
571
  scrapeList: this.serializableDataByType.scrapeList,
574
572
  scrapeSchema: this.serializableDataByType.scrapeSchema
@@ -651,25 +649,7 @@ class Interpreter extends events_1.EventEmitter {
651
649
  if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
652
650
  debug.setActionType(String(step.action));
653
651
  }
654
- // Safely extract name for this step
655
- if (step === null || step === void 0 ? void 0 : step.name) {
656
- stepName = step.name;
657
- }
658
- else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
659
- step.args.length > 0 &&
660
- typeof step.args[0] === "object" &&
661
- "__name" in step.args[0]) {
662
- stepName = step.args[0].__name;
663
- }
664
- else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
665
- (step === null || step === void 0 ? void 0 : step.args) !== null &&
666
- "__name" in step.args) {
667
- stepName = step.args.__name;
668
- }
669
- // Default fallback
670
- if (!stepName) {
671
- stepName = String(step.action);
672
- }
652
+ stepName = (step === null || step === void 0 ? void 0 : step.name) || String(step.action);
673
653
  if (debug && typeof debug.setActionName === "function") {
674
654
  debug.setActionName(stepName);
675
655
  }
@@ -682,9 +662,12 @@ class Interpreter extends events_1.EventEmitter {
682
662
  // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
683
663
  const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
684
664
  if (step.action === 'screenshot') {
685
- // call the screenshot handler directly to allow the extra name parameter
686
665
  yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
687
666
  }
667
+ else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') {
668
+ const actionName = step.name || "";
669
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []), actionName);
670
+ }
688
671
  else {
689
672
  yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
690
673
  }
@@ -744,16 +727,14 @@ class Interpreter extends events_1.EventEmitter {
744
727
  }
745
728
  });
746
729
  }
747
- handlePagination(page, config) {
748
- return __awaiter(this, void 0, void 0, function* () {
749
- // Check abort flag at start of pagination
730
+ handlePagination(page_1, config_1) {
731
+ return __awaiter(this, arguments, void 0, function* (page, config, providedActionName = "") {
750
732
  if (this.isAborted) {
751
733
  this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
752
734
  return [];
753
735
  }
754
- // Generate action name for this scrapeList
755
736
  const actionType = "scrapeList";
756
- let actionName = config.__name || "";
737
+ let actionName = providedActionName || "";
757
738
  if (!actionName || actionName.trim() === "") {
758
739
  this.scrapeListCounter++;
759
740
  actionName = `List ${this.scrapeListCounter}`;
@@ -2224,9 +2205,10 @@ class Interpreter extends events_1.EventEmitter {
2224
2205
  * User-requested concurrency should be entirely managed by the concurrency manager,
2225
2206
  * e.g. via `enqueueLinks`.
2226
2207
  */
2227
- p.on('popup', (popup) => {
2208
+ const popupHandler = (popup) => {
2228
2209
  this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
2229
- });
2210
+ };
2211
+ p.on('popup', popupHandler);
2230
2212
  /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
2231
2213
  let loopIterations = 0;
2232
2214
  const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
@@ -2234,41 +2216,58 @@ class Interpreter extends events_1.EventEmitter {
2234
2216
  const MAX_CONSECUTIVE_FAILURES = 10;
2235
2217
  const startTime = Date.now();
2236
2218
  const MAX_EXECUTION_TIME = 30 * 60 * 1000; // 30 minutes max
2219
+ // Cleanup function to remove popup listener
2220
+ const cleanup = () => {
2221
+ try {
2222
+ if (!p.isClosed()) {
2223
+ p.removeListener('popup', popupHandler);
2224
+ }
2225
+ }
2226
+ catch (cleanupError) {
2227
+ }
2228
+ };
2237
2229
  while (true) {
2238
2230
  // Multiple circuit breakers to prevent infinite loops
2239
2231
  if (++loopIterations > MAX_LOOP_ITERATIONS) {
2240
2232
  this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
2233
+ cleanup();
2241
2234
  return;
2242
2235
  }
2243
2236
  // Time-based circuit breaker
2244
2237
  if (Date.now() - startTime > MAX_EXECUTION_TIME) {
2245
2238
  this.log('Maximum execution time reached (30 minutes), terminating workflow', logger_1.Level.ERROR);
2239
+ cleanup();
2246
2240
  return;
2247
2241
  }
2248
2242
  // Failure-based circuit breaker
2249
2243
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
2250
2244
  this.log('Too many consecutive failures, terminating to prevent hang', logger_1.Level.ERROR);
2245
+ cleanup();
2251
2246
  return;
2252
2247
  }
2253
2248
  // Check abort flag immediately
2254
2249
  if (this.isAborted) {
2255
2250
  this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
2251
+ cleanup();
2256
2252
  return;
2257
2253
  }
2258
2254
  // Checks whether the page was closed from outside,
2259
2255
  // or the workflow execution has been stopped via `interpreter.stop()`
2260
2256
  if (p.isClosed() || !this.stopper) {
2257
+ cleanup();
2261
2258
  return;
2262
2259
  }
2263
2260
  try {
2264
2261
  yield p.waitForLoadState();
2265
2262
  }
2266
2263
  catch (e) {
2264
+ cleanup();
2267
2265
  yield p.close();
2268
2266
  return;
2269
2267
  }
2270
2268
  if (workflowCopy.length === 0) {
2271
2269
  this.log('All actions completed. Workflow finished.', logger_1.Level.LOG);
2270
+ cleanup();
2272
2271
  return;
2273
2272
  }
2274
2273
  // const newSelectors = this.getSelectors(workflowCopy);
@@ -2359,6 +2358,7 @@ class Interpreter extends events_1.EventEmitter {
2359
2358
  }
2360
2359
  else {
2361
2360
  //await this.disableAdBlocker(p);
2361
+ cleanup();
2362
2362
  return;
2363
2363
  }
2364
2364
  }
@@ -2444,5 +2444,47 @@ class Interpreter extends events_1.EventEmitter {
2444
2444
  }
2445
2445
  });
2446
2446
  }
2447
+ /**
2448
+ * Cleanup method to release resources and prevent memory leaks
2449
+ * Call this when the interpreter is no longer needed
2450
+ */
2451
+ cleanup() {
2452
+ return __awaiter(this, void 0, void 0, function* () {
2453
+ try {
2454
+ // Stop any running workflows first
2455
+ if (this.stopper) {
2456
+ try {
2457
+ yield this.stop();
2458
+ }
2459
+ catch (error) {
2460
+ this.log(`Error stopping workflow during cleanup: ${error.message}`, logger_1.Level.WARN);
2461
+ }
2462
+ }
2463
+ // Clear ad-blocker resources
2464
+ if (this.blocker) {
2465
+ try {
2466
+ this.blocker = null;
2467
+ this.log('Ad-blocker resources cleared', logger_1.Level.DEBUG);
2468
+ }
2469
+ catch (error) {
2470
+ this.log(`Error cleaning up ad-blocker: ${error.message}`, logger_1.Level.WARN);
2471
+ }
2472
+ }
2473
+ // Clear accumulated data to free memory
2474
+ this.cumulativeResults = [];
2475
+ this.autohealFailures = [];
2476
+ this.namedResults = {};
2477
+ this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
2478
+ // Reset state
2479
+ this.isAborted = false;
2480
+ this.initializedWorkflow = null;
2481
+ this.log('Interpreter cleanup completed', logger_1.Level.DEBUG);
2482
+ }
2483
+ catch (error) {
2484
+ this.log(`Error during interpreter cleanup: ${error.message}`, logger_1.Level.ERROR);
2485
+ throw error;
2486
+ }
2487
+ });
2488
+ }
2447
2489
  }
2448
2490
  exports.default = Interpreter;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.22",
3
+ "version": "0.0.24",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",