maxun-core 0.0.26 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,6 +59,7 @@ export default class Interpreter extends EventEmitter {
59
59
  private namedResults;
60
60
  private screenshotCounter;
61
61
  private serializableDataByType;
62
+ private scrapeListCounter;
62
63
  constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
63
64
  /**
64
65
  * Sets the abort flag to immediately stop all operations
@@ -71,6 +71,7 @@ class Interpreter extends events_1.EventEmitter {
71
71
  scrapeList: {},
72
72
  scrapeSchema: {}
73
73
  };
74
+ this.scrapeListCounter = 0;
74
75
  this.workflow = workflow.workflow;
75
76
  this.initializedWorkflow = null;
76
77
  this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => {
@@ -405,7 +406,7 @@ class Interpreter extends events_1.EventEmitter {
405
406
  const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
406
407
  yield this.options.serializableCallback(scrapeResults);
407
408
  }),
408
- scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
409
+ scrapeSchema: (schema_1, ...args_1) => __awaiter(this, [schema_1, ...args_1], void 0, function* (schema, actionName = "") {
409
410
  var _a;
410
411
  if (this.isAborted) {
411
412
  this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
@@ -455,22 +456,22 @@ class Interpreter extends events_1.EventEmitter {
455
456
  }
456
457
  }
457
458
  const actionType = "scrapeSchema";
458
- const actionName = schema.__name || "Texts";
459
+ const name = actionName || "Texts";
459
460
  if (!this.namedResults[actionType])
460
461
  this.namedResults[actionType] = {};
461
- this.namedResults[actionType][actionName] = this.cumulativeResults;
462
+ this.namedResults[actionType][name] = this.cumulativeResults;
462
463
  if (!this.serializableDataByType[actionType])
463
464
  this.serializableDataByType[actionType] = {};
464
- if (!this.serializableDataByType[actionType][actionName]) {
465
- this.serializableDataByType[actionType][actionName] = [];
465
+ if (!this.serializableDataByType[actionType][name]) {
466
+ this.serializableDataByType[actionType][name] = [];
466
467
  }
467
- this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
468
+ this.serializableDataByType[actionType][name] = [...this.cumulativeResults];
468
469
  yield this.options.serializableCallback({
469
470
  scrapeList: this.serializableDataByType.scrapeList,
470
471
  scrapeSchema: this.serializableDataByType.scrapeSchema
471
472
  });
472
473
  }),
473
- scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
474
+ scrapeList: (config_1, ...args_1) => __awaiter(this, [config_1, ...args_1], void 0, function* (config, actionName = "") {
474
475
  var _a, _b;
475
476
  if (this.isAborted) {
476
477
  this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
@@ -489,6 +490,7 @@ class Interpreter extends events_1.EventEmitter {
489
490
  this.options.debugChannel.incrementScrapeListIndex();
490
491
  }
491
492
  let scrapeResults = [];
493
+ let paginationUsed = false;
492
494
  if (!config.pagination) {
493
495
  scrapeResults = yield page.evaluate((cfg) => {
494
496
  try {
@@ -501,34 +503,46 @@ class Interpreter extends events_1.EventEmitter {
501
503
  }, config);
502
504
  }
503
505
  else {
504
- scrapeResults = yield this.handlePagination(page, config);
506
+ paginationUsed = true;
507
+ scrapeResults = yield this.handlePagination(page, config, actionName);
505
508
  }
506
509
  if (!Array.isArray(scrapeResults)) {
507
510
  scrapeResults = [];
508
511
  }
509
- const actionType = "scrapeList";
510
- const actionName = config.__name || "List";
511
- if (!this.serializableDataByType[actionType])
512
- this.serializableDataByType[actionType] = {};
513
- if (!this.serializableDataByType[actionType][actionName]) {
514
- this.serializableDataByType[actionType][actionName] = [];
512
+ console.log(`ScrapeList completed with ${scrapeResults.length} results`);
513
+ if (!paginationUsed) {
514
+ const actionType = "scrapeList";
515
+ let name = actionName || "";
516
+ if (!name || name.trim() === "") {
517
+ this.scrapeListCounter++;
518
+ name = `List ${this.scrapeListCounter}`;
519
+ }
520
+ if (!this.serializableDataByType[actionType])
521
+ this.serializableDataByType[actionType] = {};
522
+ if (!this.serializableDataByType[actionType][name]) {
523
+ this.serializableDataByType[actionType][name] = [];
524
+ }
525
+ this.serializableDataByType[actionType][name].push(...scrapeResults);
526
+ yield this.options.serializableCallback({
527
+ scrapeList: this.serializableDataByType.scrapeList,
528
+ scrapeSchema: this.serializableDataByType.scrapeSchema
529
+ });
515
530
  }
516
- this.serializableDataByType[actionType][actionName].push(...scrapeResults);
517
- yield this.options.serializableCallback({
518
- scrapeList: this.serializableDataByType.scrapeList,
519
- scrapeSchema: this.serializableDataByType.scrapeSchema
520
- });
521
531
  }
522
532
  catch (error) {
523
533
  console.error('ScrapeList action failed completely:', error.message);
524
534
  const actionType = "scrapeList";
525
- const actionName = config.__name || "List";
535
+ let name = actionName || "";
536
+ if (!name || name.trim() === "") {
537
+ this.scrapeListCounter++;
538
+ name = `List ${this.scrapeListCounter}`;
539
+ }
526
540
  if (!this.namedResults[actionType])
527
541
  this.namedResults[actionType] = {};
528
- this.namedResults[actionType][actionName] = [];
542
+ this.namedResults[actionType][name] = [];
529
543
  if (!this.serializableDataByType[actionType])
530
544
  this.serializableDataByType[actionType] = {};
531
- this.serializableDataByType[actionType][actionName] = [];
545
+ this.serializableDataByType[actionType][name] = [];
532
546
  yield this.options.serializableCallback({
533
547
  scrapeList: this.serializableDataByType.scrapeList,
534
548
  scrapeSchema: this.serializableDataByType.scrapeSchema
@@ -610,23 +624,7 @@ class Interpreter extends events_1.EventEmitter {
610
624
  if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
611
625
  debug.setActionType(String(step.action));
612
626
  }
613
- if (step === null || step === void 0 ? void 0 : step.name) {
614
- stepName = step.name;
615
- }
616
- else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
617
- step.args.length > 0 &&
618
- typeof step.args[0] === "object" &&
619
- "__name" in step.args[0]) {
620
- stepName = step.args[0].__name;
621
- }
622
- else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
623
- (step === null || step === void 0 ? void 0 : step.args) !== null &&
624
- "__name" in step.args) {
625
- stepName = step.args.__name;
626
- }
627
- if (!stepName) {
628
- stepName = String(step.action);
629
- }
627
+ stepName = (step === null || step === void 0 ? void 0 : step.name) || String(step.action);
630
628
  if (debug && typeof debug.setActionName === "function") {
631
629
  debug.setActionName(stepName);
632
630
  }
@@ -640,6 +638,10 @@ class Interpreter extends events_1.EventEmitter {
640
638
  if (step.action === 'screenshot') {
641
639
  yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
642
640
  }
641
+ else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') {
642
+ const actionName = step.name || "";
643
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []), actionName);
644
+ }
643
645
  else {
644
646
  yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
645
647
  }
@@ -699,24 +701,35 @@ class Interpreter extends events_1.EventEmitter {
699
701
  }
700
702
  });
701
703
  }
702
- handlePagination(page, config) {
703
- return __awaiter(this, void 0, void 0, function* () {
704
+ handlePagination(page_1, config_1) {
705
+ return __awaiter(this, arguments, void 0, function* (page, config, providedActionName = "") {
704
706
  if (this.isAborted) {
705
707
  this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
706
708
  return [];
707
709
  }
710
+ const actionType = "scrapeList";
711
+ let actionName = providedActionName || "";
712
+ if (!actionName || actionName.trim() === "") {
713
+ this.scrapeListCounter++;
714
+ actionName = `List ${this.scrapeListCounter}`;
715
+ }
716
+ if (!this.serializableDataByType[actionType]) {
717
+ this.serializableDataByType[actionType] = {};
718
+ }
719
+ if (!this.serializableDataByType[actionType][actionName]) {
720
+ this.serializableDataByType[actionType][actionName] = [];
721
+ }
708
722
  let allResults = [];
709
723
  let previousHeight = 0;
710
724
  let scrapedItems = new Set();
711
725
  let visitedUrls = new Set();
712
726
  const MAX_RETRIES = 3;
713
- const RETRY_DELAY = 1000; // 1 second delay between retries
727
+ const RETRY_DELAY = 1000;
714
728
  const MAX_UNCHANGED_RESULTS = 5;
715
729
  const debugLog = (message, ...args) => {
716
730
  console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
717
731
  };
718
732
  const scrapeCurrentPage = () => __awaiter(this, void 0, void 0, function* () {
719
- // Check abort flag before scraping current page
720
733
  if (this.isAborted) {
721
734
  debugLog("Workflow aborted, stopping scrapeCurrentPage");
722
735
  return;
@@ -740,7 +753,11 @@ class Interpreter extends events_1.EventEmitter {
740
753
  });
741
754
  allResults = allResults.concat(newResults);
742
755
  debugLog("Results collected:", allResults.length);
743
- yield this.options.serializableCallback(allResults);
756
+ this.serializableDataByType[actionType][actionName] = [...allResults];
757
+ yield this.options.serializableCallback({
758
+ scrapeList: this.serializableDataByType.scrapeList,
759
+ scrapeSchema: this.serializableDataByType.scrapeSchema
760
+ });
744
761
  });
745
762
  const checkLimit = () => {
746
763
  if (config.limit && allResults.length >= config.limit) {
@@ -1006,7 +1023,7 @@ class Interpreter extends events_1.EventEmitter {
1006
1023
  }).catch(e => {
1007
1024
  throw e;
1008
1025
  }),
1009
- button.click()
1026
+ page.locator(workingSelector).first().click()
1010
1027
  ]);
1011
1028
  debugLog("Navigation successful after regular click");
1012
1029
  yield page.waitForTimeout(2000);
@@ -1022,7 +1039,7 @@ class Interpreter extends events_1.EventEmitter {
1022
1039
  }).catch(e => {
1023
1040
  throw e;
1024
1041
  }),
1025
- button.dispatchEvent('click')
1042
+ page.locator(workingSelector).first().dispatchEvent('click')
1026
1043
  ]);
1027
1044
  debugLog("Navigation successful after dispatch event");
1028
1045
  yield page.waitForTimeout(2000);
@@ -1030,11 +1047,11 @@ class Interpreter extends events_1.EventEmitter {
1030
1047
  }
1031
1048
  catch (dispatchNavError) {
1032
1049
  try {
1033
- yield button.click();
1050
+ yield page.locator(workingSelector).first().click();
1034
1051
  yield page.waitForTimeout(2000);
1035
1052
  }
1036
1053
  catch (clickError) {
1037
- yield button.dispatchEvent('click');
1054
+ yield page.locator(workingSelector).first().dispatchEvent('click');
1038
1055
  yield page.waitForTimeout(2000);
1039
1056
  }
1040
1057
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.26",
3
+ "version": "0.0.27",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",