maxun-core 0.0.26 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -357,8 +357,8 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
357
357
  * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
358
358
  * @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
359
359
  */
360
- window.scrapeList = function (_a) {
361
- return __awaiter(this, arguments, void 0, function* ({ listSelector, fields, limit = 10 }) {
360
+ window.scrapeList = function ({ listSelector, fields, limit = 10 }) {
361
+ return __awaiter(this, void 0, void 0, function* () {
362
362
  // XPath evaluation functions
363
363
  const queryInsideContext = (context, part) => {
364
364
  try {
@@ -1,4 +1,5 @@
1
- import { Page } from 'playwright';
1
+ /// <reference types="node" />
2
+ import { Page } from 'playwright-core';
2
3
  import { EventEmitter } from 'events';
3
4
  import { WorkflowFile, ParamType } from './types/workflow';
4
5
  /**
@@ -59,6 +60,7 @@ export default class Interpreter extends EventEmitter {
59
60
  private namedResults;
60
61
  private screenshotCounter;
61
62
  private serializableDataByType;
63
+ private scrapeListCounter;
62
64
  constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
63
65
  /**
64
66
  * Sets the abort flag to immediately stop all operations
@@ -114,5 +116,10 @@ export default class Interpreter extends EventEmitter {
114
116
  */
115
117
  run(page: Page, params?: ParamType): Promise<void>;
116
118
  stop(): Promise<void>;
119
+ /**
120
+ * Cleanup method to release resources and prevent memory leaks
121
+ * Call this when the interpreter is no longer needed
122
+ */
123
+ cleanup(): Promise<void>;
117
124
  }
118
125
  export {};
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
15
15
  }) : function(o, v) {
16
16
  o["default"] = v;
17
17
  });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
35
25
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
26
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
27
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -71,6 +61,7 @@ class Interpreter extends events_1.EventEmitter {
71
61
  scrapeList: {},
72
62
  scrapeSchema: {}
73
63
  };
64
+ this.scrapeListCounter = 0;
74
65
  this.workflow = workflow.workflow;
75
66
  this.initializedWorkflow = null;
76
67
  this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => {
@@ -331,8 +322,8 @@ class Interpreter extends events_1.EventEmitter {
331
322
  * @param steps Array of actions.
332
323
  */
333
324
  carryOutSteps(page, steps) {
325
+ var _a;
334
326
  return __awaiter(this, void 0, void 0, function* () {
335
- var _a;
336
327
  if (this.isAborted) {
337
328
  this.log('Workflow aborted, stopping execution', logger_1.Level.WARN);
338
329
  return;
@@ -347,8 +338,8 @@ class Interpreter extends events_1.EventEmitter {
347
338
  */
348
339
  const wawActions = {
349
340
  screenshot: (params, nameOverride) => __awaiter(this, void 0, void 0, function* () {
350
- var _a;
351
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
341
+ var _b;
342
+ if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.setActionType) {
352
343
  this.options.debugChannel.setActionType("screenshot");
353
344
  }
354
345
  const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
@@ -368,8 +359,8 @@ class Interpreter extends events_1.EventEmitter {
368
359
  }, "image/png");
369
360
  }),
370
361
  enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
371
- var _a;
372
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
362
+ var _c;
363
+ if ((_c = this.options.debugChannel) === null || _c === void 0 ? void 0 : _c.setActionType) {
373
364
  this.options.debugChannel.setActionType('enqueueLinks');
374
365
  }
375
366
  const links = yield page.locator(selector)
@@ -380,8 +371,9 @@ class Interpreter extends events_1.EventEmitter {
380
371
  for (const link of links) {
381
372
  // eslint-disable-next-line
382
373
  this.concurrency.addJob(() => __awaiter(this, void 0, void 0, function* () {
374
+ let newPage = null;
383
375
  try {
384
- const newPage = yield context.newPage();
376
+ newPage = yield context.newPage();
385
377
  yield newPage.goto(link);
386
378
  yield newPage.waitForLoadState('networkidle');
387
379
  yield this.runLoop(newPage, this.initializedWorkflow);
@@ -392,26 +384,36 @@ class Interpreter extends events_1.EventEmitter {
392
384
  // the interpreter by throwing).
393
385
  this.log(e, logger_1.Level.ERROR);
394
386
  }
387
+ finally {
388
+ if (newPage && !newPage.isClosed()) {
389
+ try {
390
+ yield newPage.close();
391
+ }
392
+ catch (closeError) {
393
+ this.log('Failed to close enqueued page', logger_1.Level.WARN);
394
+ }
395
+ }
396
+ }
395
397
  }));
396
398
  }
397
399
  yield page.close();
398
400
  }),
399
401
  scrape: (selector) => __awaiter(this, void 0, void 0, function* () {
400
- var _a;
401
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
402
+ var _d;
403
+ if ((_d = this.options.debugChannel) === null || _d === void 0 ? void 0 : _d.setActionType) {
402
404
  this.options.debugChannel.setActionType('scrape');
403
405
  }
404
406
  yield this.ensureScriptsLoaded(page);
405
407
  const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
406
408
  yield this.options.serializableCallback(scrapeResults);
407
409
  }),
408
- scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
409
- var _a;
410
+ scrapeSchema: (schema, actionName = "") => __awaiter(this, void 0, void 0, function* () {
411
+ var _e;
410
412
  if (this.isAborted) {
411
413
  this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
412
414
  return;
413
415
  }
414
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
416
+ if ((_e = this.options.debugChannel) === null || _e === void 0 ? void 0 : _e.setActionType) {
415
417
  this.options.debugChannel.setActionType('scrapeSchema');
416
418
  }
417
419
  if (this.options.mode && this.options.mode === 'editor') {
@@ -455,28 +457,28 @@ class Interpreter extends events_1.EventEmitter {
455
457
  }
456
458
  }
457
459
  const actionType = "scrapeSchema";
458
- const actionName = schema.__name || "Texts";
460
+ const name = actionName || "Texts";
459
461
  if (!this.namedResults[actionType])
460
462
  this.namedResults[actionType] = {};
461
- this.namedResults[actionType][actionName] = this.cumulativeResults;
463
+ this.namedResults[actionType][name] = this.cumulativeResults;
462
464
  if (!this.serializableDataByType[actionType])
463
465
  this.serializableDataByType[actionType] = {};
464
- if (!this.serializableDataByType[actionType][actionName]) {
465
- this.serializableDataByType[actionType][actionName] = [];
466
+ if (!this.serializableDataByType[actionType][name]) {
467
+ this.serializableDataByType[actionType][name] = [];
466
468
  }
467
- this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
469
+ this.serializableDataByType[actionType][name] = [...this.cumulativeResults];
468
470
  yield this.options.serializableCallback({
469
471
  scrapeList: this.serializableDataByType.scrapeList,
470
472
  scrapeSchema: this.serializableDataByType.scrapeSchema
471
473
  });
472
474
  }),
473
- scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
474
- var _a, _b;
475
+ scrapeList: (config, actionName = "") => __awaiter(this, void 0, void 0, function* () {
476
+ var _f, _g;
475
477
  if (this.isAborted) {
476
478
  this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
477
479
  return;
478
480
  }
479
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
481
+ if ((_f = this.options.debugChannel) === null || _f === void 0 ? void 0 : _f.setActionType) {
480
482
  this.options.debugChannel.setActionType('scrapeList');
481
483
  }
482
484
  if (this.options.mode && this.options.mode === 'editor') {
@@ -485,10 +487,11 @@ class Interpreter extends events_1.EventEmitter {
485
487
  }
486
488
  try {
487
489
  yield this.ensureScriptsLoaded(page);
488
- if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.incrementScrapeListIndex) {
490
+ if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.incrementScrapeListIndex) {
489
491
  this.options.debugChannel.incrementScrapeListIndex();
490
492
  }
491
493
  let scrapeResults = [];
494
+ let paginationUsed = false;
492
495
  if (!config.pagination) {
493
496
  scrapeResults = yield page.evaluate((cfg) => {
494
497
  try {
@@ -501,34 +504,46 @@ class Interpreter extends events_1.EventEmitter {
501
504
  }, config);
502
505
  }
503
506
  else {
504
- scrapeResults = yield this.handlePagination(page, config);
507
+ paginationUsed = true;
508
+ scrapeResults = yield this.handlePagination(page, config, actionName);
505
509
  }
506
510
  if (!Array.isArray(scrapeResults)) {
507
511
  scrapeResults = [];
508
512
  }
509
- const actionType = "scrapeList";
510
- const actionName = config.__name || "List";
511
- if (!this.serializableDataByType[actionType])
512
- this.serializableDataByType[actionType] = {};
513
- if (!this.serializableDataByType[actionType][actionName]) {
514
- this.serializableDataByType[actionType][actionName] = [];
513
+ console.log(`ScrapeList completed with ${scrapeResults.length} results`);
514
+ if (!paginationUsed) {
515
+ const actionType = "scrapeList";
516
+ let name = actionName || "";
517
+ if (!name || name.trim() === "") {
518
+ this.scrapeListCounter++;
519
+ name = `List ${this.scrapeListCounter}`;
520
+ }
521
+ if (!this.serializableDataByType[actionType])
522
+ this.serializableDataByType[actionType] = {};
523
+ if (!this.serializableDataByType[actionType][name]) {
524
+ this.serializableDataByType[actionType][name] = [];
525
+ }
526
+ this.serializableDataByType[actionType][name].push(...scrapeResults);
527
+ yield this.options.serializableCallback({
528
+ scrapeList: this.serializableDataByType.scrapeList,
529
+ scrapeSchema: this.serializableDataByType.scrapeSchema
530
+ });
515
531
  }
516
- this.serializableDataByType[actionType][actionName].push(...scrapeResults);
517
- yield this.options.serializableCallback({
518
- scrapeList: this.serializableDataByType.scrapeList,
519
- scrapeSchema: this.serializableDataByType.scrapeSchema
520
- });
521
532
  }
522
533
  catch (error) {
523
534
  console.error('ScrapeList action failed completely:', error.message);
524
535
  const actionType = "scrapeList";
525
- const actionName = config.__name || "List";
536
+ let name = actionName || "";
537
+ if (!name || name.trim() === "") {
538
+ this.scrapeListCounter++;
539
+ name = `List ${this.scrapeListCounter}`;
540
+ }
526
541
  if (!this.namedResults[actionType])
527
542
  this.namedResults[actionType] = {};
528
- this.namedResults[actionType][actionName] = [];
543
+ this.namedResults[actionType][name] = [];
529
544
  if (!this.serializableDataByType[actionType])
530
545
  this.serializableDataByType[actionType] = {};
531
- this.serializableDataByType[actionType][actionName] = [];
546
+ this.serializableDataByType[actionType][name] = [];
532
547
  yield this.options.serializableCallback({
533
548
  scrapeList: this.serializableDataByType.scrapeList,
534
549
  scrapeSchema: this.serializableDataByType.scrapeSchema
@@ -536,8 +551,8 @@ class Interpreter extends events_1.EventEmitter {
536
551
  }
537
552
  }),
538
553
  scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
539
- var _a;
540
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
554
+ var _h;
555
+ if ((_h = this.options.debugChannel) === null || _h === void 0 ? void 0 : _h.setActionType) {
541
556
  this.options.debugChannel.setActionType('scrapeListAuto');
542
557
  }
543
558
  yield this.ensureScriptsLoaded(page);
@@ -547,8 +562,8 @@ class Interpreter extends events_1.EventEmitter {
547
562
  yield this.options.serializableCallback(scrapeResults);
548
563
  }),
549
564
  scroll: (pages) => __awaiter(this, void 0, void 0, function* () {
550
- var _a;
551
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
565
+ var _j;
566
+ if ((_j = this.options.debugChannel) === null || _j === void 0 ? void 0 : _j.setActionType) {
552
567
  this.options.debugChannel.setActionType('scroll');
553
568
  }
554
569
  yield page.evaluate((pagesInternal) => __awaiter(this, void 0, void 0, function* () {
@@ -559,8 +574,8 @@ class Interpreter extends events_1.EventEmitter {
559
574
  }), pages !== null && pages !== void 0 ? pages : 1);
560
575
  }),
561
576
  script: (code) => __awaiter(this, void 0, void 0, function* () {
562
- var _a;
563
- if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
577
+ var _k;
578
+ if ((_k = this.options.debugChannel) === null || _k === void 0 ? void 0 : _k.setActionType) {
564
579
  this.options.debugChannel.setActionType('script');
565
580
  }
566
581
  try {
@@ -610,23 +625,7 @@ class Interpreter extends events_1.EventEmitter {
610
625
  if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
611
626
  debug.setActionType(String(step.action));
612
627
  }
613
- if (step === null || step === void 0 ? void 0 : step.name) {
614
- stepName = step.name;
615
- }
616
- else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
617
- step.args.length > 0 &&
618
- typeof step.args[0] === "object" &&
619
- "__name" in step.args[0]) {
620
- stepName = step.args[0].__name;
621
- }
622
- else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
623
- (step === null || step === void 0 ? void 0 : step.args) !== null &&
624
- "__name" in step.args) {
625
- stepName = step.args.__name;
626
- }
627
- if (!stepName) {
628
- stepName = String(step.action);
629
- }
628
+ stepName = (step === null || step === void 0 ? void 0 : step.name) || String(step.action);
630
629
  if (debug && typeof debug.setActionName === "function") {
631
630
  debug.setActionName(stepName);
632
631
  }
@@ -640,6 +639,10 @@ class Interpreter extends events_1.EventEmitter {
640
639
  if (step.action === 'screenshot') {
641
640
  yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
642
641
  }
642
+ else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') {
643
+ const actionName = step.name || "";
644
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []), actionName);
645
+ }
643
646
  else {
644
647
  yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
645
648
  }
@@ -699,24 +702,35 @@ class Interpreter extends events_1.EventEmitter {
699
702
  }
700
703
  });
701
704
  }
702
- handlePagination(page, config) {
705
+ handlePagination(page, config, providedActionName = "") {
703
706
  return __awaiter(this, void 0, void 0, function* () {
704
707
  if (this.isAborted) {
705
708
  this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
706
709
  return [];
707
710
  }
711
+ const actionType = "scrapeList";
712
+ let actionName = providedActionName || "";
713
+ if (!actionName || actionName.trim() === "") {
714
+ this.scrapeListCounter++;
715
+ actionName = `List ${this.scrapeListCounter}`;
716
+ }
717
+ if (!this.serializableDataByType[actionType]) {
718
+ this.serializableDataByType[actionType] = {};
719
+ }
720
+ if (!this.serializableDataByType[actionType][actionName]) {
721
+ this.serializableDataByType[actionType][actionName] = [];
722
+ }
708
723
  let allResults = [];
709
724
  let previousHeight = 0;
710
725
  let scrapedItems = new Set();
711
726
  let visitedUrls = new Set();
712
727
  const MAX_RETRIES = 3;
713
- const RETRY_DELAY = 1000; // 1 second delay between retries
728
+ const RETRY_DELAY = 1000;
714
729
  const MAX_UNCHANGED_RESULTS = 5;
715
730
  const debugLog = (message, ...args) => {
716
731
  console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
717
732
  };
718
733
  const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
719
- // Check abort flag before scraping current page
720
734
  if (this.isAborted) {
721
735
  debugLog("Workflow aborted, stopping scrapeCurrentPage");
722
736
  return;
@@ -740,7 +754,11 @@ class Interpreter extends events_1.EventEmitter {
740
754
  });
741
755
  allResults = allResults.concat(newResults);
742
756
  debugLog("Results collected:", allResults.length);
743
- yield this.options.serializableCallback(allResults);
757
+ this.serializableDataByType[actionType][actionName] = [...allResults];
758
+ yield this.options.serializableCallback({
759
+ scrapeList: this.serializableDataByType.scrapeList,
760
+ scrapeSchema: this.serializableDataByType.scrapeSchema
761
+ });
744
762
  });
745
763
  const checkLimit = () => {
746
764
  if (config.limit && allResults.length >= config.limit) {
@@ -762,7 +780,7 @@ class Interpreter extends events_1.EventEmitter {
762
780
  selector.includes(' or ');
763
781
  };
764
782
  // Helper function to wait for selector (CSS or XPath)
765
- const waitForSelectorUniversal = (selector_1, ...args_1) => __awaiter(this, [selector_1, ...args_1], void 0, function* (selector, options = {}) {
783
+ const waitForSelectorUniversal = (selector, options = {}) => __awaiter(this, void 0, void 0, function* () {
766
784
  try {
767
785
  if (isXPathSelector(selector)) {
768
786
  // Use XPath locator
@@ -842,7 +860,7 @@ class Interpreter extends events_1.EventEmitter {
842
860
  updatedSelectors
843
861
  };
844
862
  });
845
- const retryOperation = (operation_1, ...args_1) => __awaiter(this, [operation_1, ...args_1], void 0, function* (operation, retryCount = 0) {
863
+ const retryOperation = (operation, retryCount = 0) => __awaiter(this, void 0, void 0, function* () {
846
864
  try {
847
865
  return yield operation();
848
866
  }
@@ -1006,7 +1024,7 @@ class Interpreter extends events_1.EventEmitter {
1006
1024
  }).catch(e => {
1007
1025
  throw e;
1008
1026
  }),
1009
- button.click()
1027
+ page.locator(workingSelector).first().click()
1010
1028
  ]);
1011
1029
  debugLog("Navigation successful after regular click");
1012
1030
  yield page.waitForTimeout(2000);
@@ -1022,7 +1040,7 @@ class Interpreter extends events_1.EventEmitter {
1022
1040
  }).catch(e => {
1023
1041
  throw e;
1024
1042
  }),
1025
- button.dispatchEvent('click')
1043
+ page.locator(workingSelector).first().dispatchEvent('click')
1026
1044
  ]);
1027
1045
  debugLog("Navigation successful after dispatch event");
1028
1046
  yield page.waitForTimeout(2000);
@@ -1030,11 +1048,11 @@ class Interpreter extends events_1.EventEmitter {
1030
1048
  }
1031
1049
  catch (dispatchNavError) {
1032
1050
  try {
1033
- yield button.click();
1051
+ yield page.locator(workingSelector).first().click();
1034
1052
  yield page.waitForTimeout(2000);
1035
1053
  }
1036
1054
  catch (clickError) {
1037
- yield button.dispatchEvent('click');
1055
+ yield page.locator(workingSelector).first().dispatchEvent('click');
1038
1056
  yield page.waitForTimeout(2000);
1039
1057
  }
1040
1058
  }
@@ -1222,8 +1240,8 @@ class Interpreter extends events_1.EventEmitter {
1222
1240
  return workflow;
1223
1241
  }
1224
1242
  runLoop(p, workflow) {
1243
+ var _a, _b;
1225
1244
  return __awaiter(this, void 0, void 0, function* () {
1226
- var _a, _b;
1227
1245
  if (this.isAborted) {
1228
1246
  this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
1229
1247
  return;
@@ -1247,36 +1265,52 @@ class Interpreter extends events_1.EventEmitter {
1247
1265
  * User-requested concurrency should be entirely managed by the concurrency manager,
1248
1266
  * e.g. via `enqueueLinks`.
1249
1267
  */
1250
- p.on('popup', (popup) => {
1268
+ const popupHandler = (popup) => {
1251
1269
  this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
1252
- });
1270
+ };
1271
+ p.on('popup', popupHandler);
1253
1272
  /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
1254
1273
  let loopIterations = 0;
1255
1274
  const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
1275
+ // Cleanup function to remove popup listener
1276
+ const cleanup = () => {
1277
+ try {
1278
+ if (!p.isClosed()) {
1279
+ p.removeListener('popup', popupHandler);
1280
+ }
1281
+ }
1282
+ catch (cleanupError) {
1283
+ }
1284
+ };
1256
1285
  while (true) {
1257
1286
  if (this.isAborted) {
1258
1287
  this.log('Workflow aborted during step execution', logger_1.Level.WARN);
1288
+ cleanup();
1259
1289
  return;
1260
1290
  }
1261
1291
  // Circuit breaker to prevent infinite loops
1262
1292
  if (++loopIterations > MAX_LOOP_ITERATIONS) {
1263
1293
  this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
1294
+ cleanup();
1264
1295
  return;
1265
1296
  }
1266
1297
  // Checks whether the page was closed from outside,
1267
1298
  // or the workflow execution has been stopped via `interpreter.stop()`
1268
1299
  if (p.isClosed() || !this.stopper) {
1300
+ cleanup();
1269
1301
  return;
1270
1302
  }
1271
1303
  try {
1272
1304
  yield p.waitForLoadState();
1273
1305
  }
1274
1306
  catch (e) {
1307
+ cleanup();
1275
1308
  yield p.close();
1276
1309
  return;
1277
1310
  }
1278
1311
  if (workflowCopy.length === 0) {
1279
1312
  this.log('All actions completed. Workflow finished.', logger_1.Level.LOG);
1313
+ cleanup();
1280
1314
  return;
1281
1315
  }
1282
1316
  // let pageState = {};
@@ -1355,6 +1389,7 @@ class Interpreter extends events_1.EventEmitter {
1355
1389
  }
1356
1390
  else {
1357
1391
  //await this.disableAdBlocker(p);
1392
+ cleanup();
1358
1393
  return;
1359
1394
  }
1360
1395
  }
@@ -1438,5 +1473,46 @@ class Interpreter extends events_1.EventEmitter {
1438
1473
  }
1439
1474
  });
1440
1475
  }
1476
+ /**
1477
+ * Cleanup method to release resources and prevent memory leaks
1478
+ * Call this when the interpreter is no longer needed
1479
+ */
1480
+ cleanup() {
1481
+ return __awaiter(this, void 0, void 0, function* () {
1482
+ try {
1483
+ // Stop any running workflows first
1484
+ if (this.stopper) {
1485
+ try {
1486
+ yield this.stop();
1487
+ }
1488
+ catch (error) {
1489
+ this.log(`Error stopping workflow during cleanup: ${error.message}`, logger_1.Level.WARN);
1490
+ }
1491
+ }
1492
+ // Clear ad-blocker resources
1493
+ if (this.blocker) {
1494
+ try {
1495
+ this.blocker = null;
1496
+ this.log('Ad-blocker resources cleared', logger_1.Level.DEBUG);
1497
+ }
1498
+ catch (error) {
1499
+ this.log(`Error cleaning up ad-blocker: ${error.message}`, logger_1.Level.WARN);
1500
+ }
1501
+ }
1502
+ // Clear accumulated data to free memory
1503
+ this.cumulativeResults = [];
1504
+ this.namedResults = {};
1505
+ this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
1506
+ // Reset state
1507
+ this.isAborted = false;
1508
+ this.initializedWorkflow = null;
1509
+ this.log('Interpreter cleanup completed', logger_1.Level.DEBUG);
1510
+ }
1511
+ catch (error) {
1512
+ this.log(`Error during interpreter cleanup: ${error.message}`, logger_1.Level.ERROR);
1513
+ throw error;
1514
+ }
1515
+ });
1516
+ }
1441
1517
  }
1442
1518
  exports.default = Interpreter;
@@ -1,4 +1,4 @@
1
- import { Page } from 'playwright';
1
+ import { Page } from 'playwright-core';
2
2
  import { naryOperators, unaryOperators, operators, meta } from './logic';
3
3
  export type Operator = typeof operators[number];
4
4
  export type UnaryOperator = typeof unaryOperators[number];
@@ -4,7 +4,6 @@
4
4
  */
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.Level = void 0;
7
- exports.default = logger;
8
7
  var Level;
9
8
  (function (Level) {
10
9
  Level[Level["DATE"] = 36] = "DATE";
@@ -13,7 +12,7 @@ var Level;
13
12
  Level[Level["ERROR"] = 31] = "ERROR";
14
13
  Level[Level["DEBUG"] = 95] = "DEBUG";
15
14
  Level[Level["RESET"] = 0] = "RESET";
16
- })(Level || (exports.Level = Level = {}));
15
+ })(Level = exports.Level || (exports.Level = {}));
17
16
  function logger(message, level = Level.LOG) {
18
17
  let m = message;
19
18
  if (message.constructor.name.includes('Error') && typeof message !== 'string') {
@@ -29,3 +28,4 @@ function logger(message, level = Level.LOG) {
29
28
  }
30
29
  process.stdout.write(`\x1b[${Level.RESET}m\n`);
31
30
  }
31
+ exports.default = logger;
@@ -4,7 +4,7 @@
4
4
  * (it still does not represent the "utils" file)
5
5
  */
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
7
- exports.arrayToObject = arrayToObject;
7
+ exports.arrayToObject = void 0;
8
8
  /* eslint-disable import/prefer-default-export */
9
9
  /**
10
10
  * Converts an array of scalars to an object with **items** of the array **for keys**.
@@ -12,3 +12,4 @@ exports.arrayToObject = arrayToObject;
12
12
  function arrayToObject(array) {
13
13
  return array.reduce((p, x) => (Object.assign(Object.assign({}, p), { [x]: [] })), {});
14
14
  }
15
+ exports.arrayToObject = arrayToObject;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.26",
3
+ "version": "0.0.28",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",
@@ -31,10 +31,10 @@
31
31
  "license": "AGPL-3.0-or-later",
32
32
  "dependencies": {
33
33
  "@cliqz/adblocker-playwright": "^1.31.3",
34
+ "@types/node": "22.7.9",
34
35
  "cross-fetch": "^4.0.0",
35
36
  "joi": "^17.6.0",
36
- "playwright": "^1.20.1",
37
- "playwright-extra": "^4.3.6",
38
- "puppeteer-extra-plugin-stealth": "^2.11.2"
37
+ "playwright-core": "1.57.0",
38
+ "turndown": "^7.2.2"
39
39
  }
40
- }
40
+ }