maxun-core 0.0.24 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +3 -0
- package/build/interpret.js +153 -22
- package/build/preprocessor.js +2 -0
- package/build/types/workflow.d.ts +2 -0
- package/package.json +1 -1
package/build/interpret.d.ts
CHANGED
|
@@ -57,6 +57,9 @@ export default class Interpreter extends EventEmitter {
|
|
|
57
57
|
private log;
|
|
58
58
|
private blocker;
|
|
59
59
|
private cumulativeResults;
|
|
60
|
+
private namedResults;
|
|
61
|
+
private screenshotCounter;
|
|
62
|
+
private serializableDataByType;
|
|
60
63
|
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
|
|
61
64
|
/**
|
|
62
65
|
* Sets the abort flag to immediately stop all operations
|
package/build/interpret.js
CHANGED
|
@@ -55,6 +55,12 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
55
55
|
this.isAborted = false;
|
|
56
56
|
this.blocker = null;
|
|
57
57
|
this.cumulativeResults = [];
|
|
58
|
+
this.namedResults = {};
|
|
59
|
+
this.screenshotCounter = 0;
|
|
60
|
+
this.serializableDataByType = {
|
|
61
|
+
scrapeList: {},
|
|
62
|
+
scrapeSchema: {}
|
|
63
|
+
};
|
|
58
64
|
this.workflow = workflow.workflow;
|
|
59
65
|
this.initializedWorkflow = null;
|
|
60
66
|
this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => {
|
|
@@ -330,13 +336,26 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
330
336
|
* Beware of false linter errors - here, we know better!
|
|
331
337
|
*/
|
|
332
338
|
const wawActions = {
|
|
333
|
-
screenshot: (params) => __awaiter(this, void 0, void 0, function* () {
|
|
339
|
+
screenshot: (params, nameOverride) => __awaiter(this, void 0, void 0, function* () {
|
|
334
340
|
var _b;
|
|
335
341
|
if ((_b = this.options.debugChannel) === null || _b === void 0 ? void 0 : _b.setActionType) {
|
|
336
|
-
this.options.debugChannel.setActionType(
|
|
342
|
+
this.options.debugChannel.setActionType("screenshot");
|
|
337
343
|
}
|
|
338
344
|
const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
|
|
339
|
-
|
|
345
|
+
const explicitName = (typeof nameOverride === 'string' && nameOverride.trim().length > 0) ? nameOverride.trim() : null;
|
|
346
|
+
let screenshotName;
|
|
347
|
+
if (explicitName) {
|
|
348
|
+
screenshotName = explicitName;
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
this.screenshotCounter += 1;
|
|
352
|
+
screenshotName = `Screenshot ${this.screenshotCounter}`;
|
|
353
|
+
}
|
|
354
|
+
yield this.options.binaryCallback({
|
|
355
|
+
name: screenshotName,
|
|
356
|
+
data: screenshotBuffer,
|
|
357
|
+
mimeType: "image/png",
|
|
358
|
+
}, "image/png");
|
|
340
359
|
}),
|
|
341
360
|
enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
|
|
342
361
|
var _c;
|
|
@@ -394,18 +413,52 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
394
413
|
if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
|
|
395
414
|
this.cumulativeResults = [];
|
|
396
415
|
}
|
|
416
|
+
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
|
|
397
417
|
if (this.cumulativeResults.length === 0) {
|
|
398
|
-
|
|
418
|
+
const newRow = {};
|
|
419
|
+
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
420
|
+
if (value !== undefined) {
|
|
421
|
+
newRow[key] = value;
|
|
422
|
+
}
|
|
423
|
+
});
|
|
424
|
+
this.cumulativeResults.push(newRow);
|
|
399
425
|
}
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
426
|
+
else {
|
|
427
|
+
const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
|
|
428
|
+
const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
|
|
429
|
+
const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
|
|
430
|
+
if (hasRepeatedKeys) {
|
|
431
|
+
const newRow = {};
|
|
432
|
+
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
433
|
+
if (value !== undefined) {
|
|
434
|
+
newRow[key] = value;
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
this.cumulativeResults.push(newRow);
|
|
438
|
+
}
|
|
439
|
+
else {
|
|
440
|
+
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
441
|
+
if (value !== undefined) {
|
|
442
|
+
lastRow[key] = value;
|
|
443
|
+
}
|
|
444
|
+
});
|
|
405
445
|
}
|
|
446
|
+
}
|
|
447
|
+
const actionType = "scrapeSchema";
|
|
448
|
+
const actionName = schema.__name || "Texts";
|
|
449
|
+
if (!this.namedResults[actionType])
|
|
450
|
+
this.namedResults[actionType] = {};
|
|
451
|
+
this.namedResults[actionType][actionName] = this.cumulativeResults;
|
|
452
|
+
if (!this.serializableDataByType[actionType])
|
|
453
|
+
this.serializableDataByType[actionType] = {};
|
|
454
|
+
if (!this.serializableDataByType[actionType][actionName]) {
|
|
455
|
+
this.serializableDataByType[actionType][actionName] = [];
|
|
456
|
+
}
|
|
457
|
+
this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
|
|
458
|
+
yield this.options.serializableCallback({
|
|
459
|
+
scrapeList: this.serializableDataByType.scrapeList,
|
|
460
|
+
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
406
461
|
});
|
|
407
|
-
console.log("Updated merged result:", mergedResult);
|
|
408
|
-
yield this.options.serializableCallback([mergedResult]);
|
|
409
462
|
}),
|
|
410
463
|
scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
411
464
|
var _f, _g;
|
|
@@ -420,17 +473,56 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
420
473
|
yield this.options.serializableCallback({});
|
|
421
474
|
return;
|
|
422
475
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
this.options.debugChannel.incrementScrapeListIndex
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
476
|
+
try {
|
|
477
|
+
yield this.ensureScriptsLoaded(page);
|
|
478
|
+
if ((_g = this.options.debugChannel) === null || _g === void 0 ? void 0 : _g.incrementScrapeListIndex) {
|
|
479
|
+
this.options.debugChannel.incrementScrapeListIndex();
|
|
480
|
+
}
|
|
481
|
+
let scrapeResults = [];
|
|
482
|
+
if (!config.pagination) {
|
|
483
|
+
scrapeResults = yield page.evaluate((cfg) => {
|
|
484
|
+
try {
|
|
485
|
+
return window.scrapeList(cfg);
|
|
486
|
+
}
|
|
487
|
+
catch (error) {
|
|
488
|
+
console.warn('ScrapeList evaluation failed:', error.message);
|
|
489
|
+
return [];
|
|
490
|
+
}
|
|
491
|
+
}, config);
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
scrapeResults = yield this.handlePagination(page, config);
|
|
495
|
+
}
|
|
496
|
+
if (!Array.isArray(scrapeResults)) {
|
|
497
|
+
scrapeResults = [];
|
|
498
|
+
}
|
|
499
|
+
const actionType = "scrapeList";
|
|
500
|
+
const actionName = config.__name || "List";
|
|
501
|
+
if (!this.serializableDataByType[actionType])
|
|
502
|
+
this.serializableDataByType[actionType] = {};
|
|
503
|
+
if (!this.serializableDataByType[actionType][actionName]) {
|
|
504
|
+
this.serializableDataByType[actionType][actionName] = [];
|
|
505
|
+
}
|
|
506
|
+
this.serializableDataByType[actionType][actionName].push(...scrapeResults);
|
|
507
|
+
yield this.options.serializableCallback({
|
|
508
|
+
scrapeList: this.serializableDataByType.scrapeList,
|
|
509
|
+
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
510
|
+
});
|
|
430
511
|
}
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
512
|
+
catch (error) {
|
|
513
|
+
console.error('ScrapeList action failed completely:', error.message);
|
|
514
|
+
const actionType = "scrapeList";
|
|
515
|
+
const actionName = config.__name || "List";
|
|
516
|
+
if (!this.namedResults[actionType])
|
|
517
|
+
this.namedResults[actionType] = {};
|
|
518
|
+
this.namedResults[actionType][actionName] = [];
|
|
519
|
+
if (!this.serializableDataByType[actionType])
|
|
520
|
+
this.serializableDataByType[actionType] = {};
|
|
521
|
+
this.serializableDataByType[actionType][actionName] = [];
|
|
522
|
+
yield this.options.serializableCallback({
|
|
523
|
+
scrapeList: this.serializableDataByType.scrapeList,
|
|
524
|
+
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
525
|
+
});
|
|
434
526
|
}
|
|
435
527
|
}),
|
|
436
528
|
scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -497,11 +589,50 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
497
589
|
}
|
|
498
590
|
});
|
|
499
591
|
for (const step of steps) {
|
|
592
|
+
if (this.isAborted) {
|
|
593
|
+
this.log('Workflow aborted during step execution', logger_1.Level.WARN);
|
|
594
|
+
return;
|
|
595
|
+
}
|
|
500
596
|
this.log(`Launching ${String(step.action)}`, logger_1.Level.LOG);
|
|
597
|
+
let stepName = null;
|
|
598
|
+
try {
|
|
599
|
+
const debug = this.options.debugChannel;
|
|
600
|
+
if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
|
|
601
|
+
debug.setActionType(String(step.action));
|
|
602
|
+
}
|
|
603
|
+
if (step === null || step === void 0 ? void 0 : step.name) {
|
|
604
|
+
stepName = step.name;
|
|
605
|
+
}
|
|
606
|
+
else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
|
|
607
|
+
step.args.length > 0 &&
|
|
608
|
+
typeof step.args[0] === "object" &&
|
|
609
|
+
"__name" in step.args[0]) {
|
|
610
|
+
stepName = step.args[0].__name;
|
|
611
|
+
}
|
|
612
|
+
else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
|
|
613
|
+
(step === null || step === void 0 ? void 0 : step.args) !== null &&
|
|
614
|
+
"__name" in step.args) {
|
|
615
|
+
stepName = step.args.__name;
|
|
616
|
+
}
|
|
617
|
+
if (!stepName) {
|
|
618
|
+
stepName = String(step.action);
|
|
619
|
+
}
|
|
620
|
+
if (debug && typeof debug.setActionName === "function") {
|
|
621
|
+
debug.setActionName(stepName);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
catch (err) {
|
|
625
|
+
this.log(`Failed to set action name/type: ${err.message}`, logger_1.Level.WARN);
|
|
626
|
+
}
|
|
501
627
|
if (step.action in wawActions) {
|
|
502
628
|
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
|
503
629
|
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
|
|
504
|
-
|
|
630
|
+
if (step.action === 'screenshot') {
|
|
631
|
+
yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
|
|
632
|
+
}
|
|
633
|
+
else {
|
|
634
|
+
yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
|
|
635
|
+
}
|
|
505
636
|
}
|
|
506
637
|
else {
|
|
507
638
|
if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
|
package/build/preprocessor.js
CHANGED
|
@@ -34,6 +34,8 @@ class Preprocessor {
|
|
|
34
34
|
what: joi_1.default.array().items({
|
|
35
35
|
action: joi_1.default.string().required(),
|
|
36
36
|
args: joi_1.default.array().items(joi_1.default.any()),
|
|
37
|
+
name: joi_1.default.string(),
|
|
38
|
+
actionId: joi_1.default.string()
|
|
37
39
|
}).required(),
|
|
38
40
|
})).required(),
|
|
39
41
|
});
|
|
@@ -27,6 +27,8 @@ export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot
|
|
|
27
27
|
export type What = {
|
|
28
28
|
action: MethodNames<Page> | CustomFunctions;
|
|
29
29
|
args?: any[];
|
|
30
|
+
name?: string;
|
|
31
|
+
actionId?: string;
|
|
30
32
|
};
|
|
31
33
|
export type PageState = Partial<BaseConditions>;
|
|
32
34
|
export type ParamType = Record<string, any>;
|