maxun-core 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,9 @@ export default class Interpreter extends EventEmitter {
57
57
  private log;
58
58
  private blocker;
59
59
  private cumulativeResults;
60
+ private namedResults;
61
+ private screenshotCounter;
62
+ private serializableDataByType;
60
63
  constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
61
64
  /**
62
65
  * Sets the abort flag to immediately stop all operations
@@ -55,6 +55,12 @@ class Interpreter extends events_1.EventEmitter {
55
55
  this.isAborted = false;
56
56
  this.blocker = null;
57
57
  this.cumulativeResults = [];
58
+ this.namedResults = {};
59
+ this.screenshotCounter = 0;
60
+ this.serializableDataByType = {
61
+ scrapeList: {},
62
+ scrapeSchema: {}
63
+ };
58
64
  this.workflow = workflow.workflow;
59
65
  this.initializedWorkflow = null;
60
66
  this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => {
@@ -330,13 +336,26 @@ class Interpreter extends events_1.EventEmitter {
330
336
  * Beware of false linter errors - here, we know better!
331
337
  */
332
338
  const wawActions = {
333
- screenshot: (params) => __awaiter(this, void 0, void 0, function* () {
339
+ screenshot: (params, nameOverride) => __awaiter(this, void 0, void 0, function* () {
334
340
  var _b;
335
341
  if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.setActionType) {
336
- this.options.debugChannel.setActionType('screenshot');
342
+ this.options.debugChannel.setActionType("screenshot");
337
343
  }
338
344
  const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
339
- yield this.options.binaryCallback(screenshotBuffer, 'image/png');
345
+ const explicitName = (typeof nameOverride === 'string' && nameOverride.trim().length > 0) ? nameOverride.trim() : null;
346
+ let screenshotName;
347
+ if (explicitName) {
348
+ screenshotName = explicitName;
349
+ }
350
+ else {
351
+ this.screenshotCounter += 1;
352
+ screenshotName = `Screenshot ${this.screenshotCounter}`;
353
+ }
354
+ yield this.options.binaryCallback({
355
+ name: screenshotName,
356
+ data: screenshotBuffer,
357
+ mimeType: "image/png",
358
+ }, "image/png");
340
359
  }),
341
360
  enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
342
361
  var _c;
@@ -394,18 +413,52 @@ class Interpreter extends events_1.EventEmitter {
394
413
  if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
395
414
  this.cumulativeResults = [];
396
415
  }
416
+ const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
397
417
  if (this.cumulativeResults.length === 0) {
398
- this.cumulativeResults.push({});
418
+ const newRow = {};
419
+ Object.entries(resultToProcess).forEach(([key, value]) => {
420
+ if (value !== undefined) {
421
+ newRow[key] = value;
422
+ }
423
+ });
424
+ this.cumulativeResults.push(newRow);
399
425
  }
400
- const mergedResult = this.cumulativeResults[0];
401
- const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
402
- Object.entries(resultToProcess).forEach(([key, value]) => {
403
- if (value !== undefined) {
404
- mergedResult[key] = value;
426
+ else {
427
+ const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
428
+ const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
429
+ const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
430
+ if (hasRepeatedKeys) {
431
+ const newRow = {};
432
+ Object.entries(resultToProcess).forEach(([key, value]) => {
433
+ if (value !== undefined) {
434
+ newRow[key] = value;
435
+ }
436
+ });
437
+ this.cumulativeResults.push(newRow);
438
+ }
439
+ else {
440
+ Object.entries(resultToProcess).forEach(([key, value]) => {
441
+ if (value !== undefined) {
442
+ lastRow[key] = value;
443
+ }
444
+ });
405
445
  }
446
+ }
447
+ const actionType = "scrapeSchema";
448
+ const actionName = schema.__name || "Texts";
449
+ if (!this.namedResults[actionType])
450
+ this.namedResults[actionType] = {};
451
+ this.namedResults[actionType][actionName] = this.cumulativeResults;
452
+ if (!this.serializableDataByType[actionType])
453
+ this.serializableDataByType[actionType] = {};
454
+ if (!this.serializableDataByType[actionType][actionName]) {
455
+ this.serializableDataByType[actionType][actionName] = [];
456
+ }
457
+ this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
458
+ yield this.options.serializableCallback({
459
+ scrapeList: this.serializableDataByType.scrapeList,
460
+ scrapeSchema: this.serializableDataByType.scrapeSchema
406
461
  });
407
- console.log("Updated merged result:", mergedResult);
408
- yield this.options.serializableCallback([mergedResult]);
409
462
  }),
410
463
  scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
411
464
  var _f, _g;
@@ -420,17 +473,56 @@ class Interpreter extends events_1.EventEmitter {
420
473
  yield this.options.serializableCallback({});
421
474
  return;
422
475
  }
423
- yield this.ensureScriptsLoaded(page);
424
- if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.incrementScrapeListIndex) {
425
- this.options.debugChannel.incrementScrapeListIndex();
426
- }
427
- if (!config.pagination) {
428
- const scrapeResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
429
- yield this.options.serializableCallback(scrapeResults);
476
+ try {
477
+ yield this.ensureScriptsLoaded(page);
478
+ if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.incrementScrapeListIndex) {
479
+ this.options.debugChannel.incrementScrapeListIndex();
480
+ }
481
+ let scrapeResults = [];
482
+ if (!config.pagination) {
483
+ scrapeResults = yield page.evaluate((cfg) => {
484
+ try {
485
+ return window.scrapeList(cfg);
486
+ }
487
+ catch (error) {
488
+ console.warn('ScrapeList evaluation failed:', error.message);
489
+ return [];
490
+ }
491
+ }, config);
492
+ }
493
+ else {
494
+ scrapeResults = yield this.handlePagination(page, config);
495
+ }
496
+ if (!Array.isArray(scrapeResults)) {
497
+ scrapeResults = [];
498
+ }
499
+ const actionType = "scrapeList";
500
+ const actionName = config.__name || "List";
501
+ if (!this.serializableDataByType[actionType])
502
+ this.serializableDataByType[actionType] = {};
503
+ if (!this.serializableDataByType[actionType][actionName]) {
504
+ this.serializableDataByType[actionType][actionName] = [];
505
+ }
506
+ this.serializableDataByType[actionType][actionName].push(...scrapeResults);
507
+ yield this.options.serializableCallback({
508
+ scrapeList: this.serializableDataByType.scrapeList,
509
+ scrapeSchema: this.serializableDataByType.scrapeSchema
510
+ });
430
511
  }
431
- else {
432
- const scrapeResults = yield this.handlePagination(page, config);
433
- yield this.options.serializableCallback(scrapeResults);
512
+ catch (error) {
513
+ console.error('ScrapeList action failed completely:', error.message);
514
+ const actionType = "scrapeList";
515
+ const actionName = config.__name || "List";
516
+ if (!this.namedResults[actionType])
517
+ this.namedResults[actionType] = {};
518
+ this.namedResults[actionType][actionName] = [];
519
+ if (!this.serializableDataByType[actionType])
520
+ this.serializableDataByType[actionType] = {};
521
+ this.serializableDataByType[actionType][actionName] = [];
522
+ yield this.options.serializableCallback({
523
+ scrapeList: this.serializableDataByType.scrapeList,
524
+ scrapeSchema: this.serializableDataByType.scrapeSchema
525
+ });
434
526
  }
435
527
  }),
436
528
  scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
@@ -497,11 +589,50 @@ class Interpreter extends events_1.EventEmitter {
497
589
  }
498
590
  });
499
591
  for (const step of steps) {
592
+ if (this.isAborted) {
593
+ this.log('Workflow aborted during step execution', logger_1.Level.WARN);
594
+ return;
595
+ }
500
596
  this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
597
+ let stepName = null;
598
+ try {
599
+ const debug = this.options.debugChannel;
600
+ if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
601
+ debug.setActionType(String(step.action));
602
+ }
603
+ if (step === null || step === void 0 ? void 0 : step.name) {
604
+ stepName = step.name;
605
+ }
606
+ else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
607
+ step.args.length > 0 &&
608
+ typeof step.args[0] === "object" &&
609
+ "__name" in step.args[0]) {
610
+ stepName = step.args[0].__name;
611
+ }
612
+ else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
613
+ (step === null || step === void 0 ? void 0 : step.args) !== null &&
614
+ "__name" in step.args) {
615
+ stepName = step.args.__name;
616
+ }
617
+ if (!stepName) {
618
+ stepName = String(step.action);
619
+ }
620
+ if (debug && typeof debug.setActionName === "function") {
621
+ debug.setActionName(stepName);
622
+ }
623
+ }
624
+ catch (err) {
625
+ this.log(`Failed to set action name/type: ${err.message}`, logger_1.Level.WARN);
626
+ }
501
627
  if (step.action in wawActions) {
502
628
  // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
503
629
  const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
504
- yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
630
+ if (step.action === 'screenshot') {
631
+ yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
632
+ }
633
+ else {
634
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
635
+ }
505
636
  }
506
637
  else {
507
638
  if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
@@ -34,6 +34,8 @@ class Preprocessor {
34
34
  what: joi_1.default.array().items({
35
35
  action: joi_1.default.string().required(),
36
36
  args: joi_1.default.array().items(joi_1.default.any()),
37
+ name: joi_1.default.string(),
38
+ actionId: joi_1.default.string()
37
39
  }).required(),
38
40
  })).required(),
39
41
  });
@@ -27,6 +27,8 @@ export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot
27
27
  export type What = {
28
28
  action: MethodNames<Page> | CustomFunctions;
29
29
  args?: any[];
30
+ name?: string;
31
+ actionId?: string;
30
32
  };
31
33
  export type PageState = Partial<BaseConditions>;
32
34
  export type ParamType = Record<string, any>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.24",
3
+ "version": "0.0.25",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",