mx-cloud 0.0.22 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +5 -0
- package/build/interpret.js +105 -63
- package/package.json +1 -1
package/build/interpret.d.ts
CHANGED
|
@@ -160,5 +160,10 @@ export default class Interpreter extends EventEmitter {
|
|
|
160
160
|
*/
|
|
161
161
|
run(page: Page, params?: ParamType): Promise<void>;
|
|
162
162
|
stop(): Promise<void>;
|
|
163
|
+
/**
|
|
164
|
+
* Cleanup method to release resources and prevent memory leaks
|
|
165
|
+
* Call this when the interpreter is no longer needed
|
|
166
|
+
*/
|
|
167
|
+
cleanup(): Promise<void>;
|
|
163
168
|
}
|
|
164
169
|
export {};
|
package/build/interpret.js
CHANGED
|
@@ -394,8 +394,9 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
394
394
|
for (const link of links) {
|
|
395
395
|
// eslint-disable-next-line
|
|
396
396
|
this.concurrency.addJob(() => __awaiter(this, void 0, void 0, function* () {
|
|
397
|
+
let newPage = null;
|
|
397
398
|
try {
|
|
398
|
-
|
|
399
|
+
newPage = yield context.newPage();
|
|
399
400
|
yield newPage.goto(link);
|
|
400
401
|
yield newPage.waitForLoadState('networkidle');
|
|
401
402
|
yield this.runLoop(newPage, this.initializedWorkflow);
|
|
@@ -406,6 +407,16 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
406
407
|
// the interpreter by throwing).
|
|
407
408
|
this.log(e, logger_1.Level.ERROR);
|
|
408
409
|
}
|
|
410
|
+
finally {
|
|
411
|
+
if (newPage && !newPage.isClosed()) {
|
|
412
|
+
try {
|
|
413
|
+
yield newPage.close();
|
|
414
|
+
}
|
|
415
|
+
catch (closeError) {
|
|
416
|
+
this.log('Failed to close enqueued page', logger_1.Level.WARN);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
409
420
|
}));
|
|
410
421
|
}
|
|
411
422
|
yield page.close();
|
|
@@ -419,9 +430,8 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
419
430
|
const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
|
|
420
431
|
yield this.options.serializableCallback(scrapeResults);
|
|
421
432
|
}),
|
|
422
|
-
scrapeSchema: (
|
|
433
|
+
scrapeSchema: (schema_1, ...args_1) => __awaiter(this, [schema_1, ...args_1], void 0, function* (schema, actionName = "") {
|
|
423
434
|
var _a;
|
|
424
|
-
// Check abort flag at start of scraping
|
|
425
435
|
if (this.isAborted) {
|
|
426
436
|
this.log('Workflow aborted, stopping scrapeSchema', logger_1.Level.WARN);
|
|
427
437
|
return;
|
|
@@ -440,7 +450,6 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
440
450
|
}
|
|
441
451
|
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
|
|
442
452
|
if (this.cumulativeResults.length === 0) {
|
|
443
|
-
// First execution - create initial row
|
|
444
453
|
const newRow = {};
|
|
445
454
|
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
446
455
|
if (value !== undefined) {
|
|
@@ -450,12 +459,10 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
450
459
|
this.cumulativeResults.push(newRow);
|
|
451
460
|
}
|
|
452
461
|
else {
|
|
453
|
-
// Check if any keys from new result already exist in the last row
|
|
454
462
|
const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1];
|
|
455
463
|
const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined);
|
|
456
464
|
const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key));
|
|
457
465
|
if (hasRepeatedKeys) {
|
|
458
|
-
// Keys are repeated - create a new row
|
|
459
466
|
const newRow = {};
|
|
460
467
|
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
461
468
|
if (value !== undefined) {
|
|
@@ -465,7 +472,6 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
465
472
|
this.cumulativeResults.push(newRow);
|
|
466
473
|
}
|
|
467
474
|
else {
|
|
468
|
-
// No repeated keys - merge with the last row
|
|
469
475
|
Object.entries(resultToProcess).forEach(([key, value]) => {
|
|
470
476
|
if (value !== undefined) {
|
|
471
477
|
lastRow[key] = value;
|
|
@@ -473,30 +479,24 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
473
479
|
});
|
|
474
480
|
}
|
|
475
481
|
}
|
|
476
|
-
console.log("Total accumulated rows:", this.cumulativeResults.length);
|
|
477
|
-
console.log("Current results:", this.cumulativeResults);
|
|
478
|
-
// ✅ Append schema results under "scrapeSchema" → name
|
|
479
482
|
const actionType = "scrapeSchema";
|
|
480
|
-
const
|
|
483
|
+
const name = actionName || "Texts";
|
|
481
484
|
if (!this.namedResults[actionType])
|
|
482
485
|
this.namedResults[actionType] = {};
|
|
483
|
-
this.namedResults[actionType][
|
|
486
|
+
this.namedResults[actionType][name] = this.cumulativeResults;
|
|
484
487
|
if (!this.serializableDataByType[actionType])
|
|
485
488
|
this.serializableDataByType[actionType] = {};
|
|
486
|
-
if (!this.serializableDataByType[actionType][
|
|
487
|
-
this.serializableDataByType[actionType][
|
|
489
|
+
if (!this.serializableDataByType[actionType][name]) {
|
|
490
|
+
this.serializableDataByType[actionType][name] = [];
|
|
488
491
|
}
|
|
489
|
-
|
|
490
|
-
this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
|
|
491
|
-
// now emit full structured object
|
|
492
|
+
this.serializableDataByType[actionType][name] = [...this.cumulativeResults];
|
|
492
493
|
yield this.options.serializableCallback({
|
|
493
494
|
scrapeList: this.serializableDataByType.scrapeList,
|
|
494
495
|
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
495
496
|
});
|
|
496
497
|
}),
|
|
497
|
-
scrapeList: (
|
|
498
|
+
scrapeList: (config_1, ...args_1) => __awaiter(this, [config_1, ...args_1], void 0, function* (config, actionName = "") {
|
|
498
499
|
var _a, _b;
|
|
499
|
-
// Check abort flag at start of scraping
|
|
500
500
|
if (this.isAborted) {
|
|
501
501
|
this.log('Workflow aborted, stopping scrapeList', logger_1.Level.WARN);
|
|
502
502
|
return;
|
|
@@ -522,36 +522,31 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
522
522
|
}
|
|
523
523
|
catch (error) {
|
|
524
524
|
console.warn('ScrapeList evaluation failed:', error.message);
|
|
525
|
-
return [];
|
|
525
|
+
return [];
|
|
526
526
|
}
|
|
527
527
|
}, config);
|
|
528
528
|
}
|
|
529
529
|
else {
|
|
530
530
|
paginationUsed = true;
|
|
531
|
-
scrapeResults = yield this.handlePagination(page, config);
|
|
531
|
+
scrapeResults = yield this.handlePagination(page, config, actionName);
|
|
532
532
|
}
|
|
533
|
-
// Ensure we always have an array
|
|
534
533
|
if (!Array.isArray(scrapeResults)) {
|
|
535
534
|
scrapeResults = [];
|
|
536
535
|
}
|
|
537
536
|
console.log(`ScrapeList completed with ${scrapeResults.length} results`);
|
|
538
|
-
// Only process and callback if pagination wasn't used
|
|
539
|
-
// (handlePagination already handles storage and callbacks internally)
|
|
540
537
|
if (!paginationUsed) {
|
|
541
|
-
// ✅ Append list results under "scrapeList" → name
|
|
542
538
|
const actionType = "scrapeList";
|
|
543
|
-
let
|
|
544
|
-
|
|
545
|
-
if (!actionName || actionName.trim() === "") {
|
|
539
|
+
let name = actionName || "";
|
|
540
|
+
if (!name || name.trim() === "") {
|
|
546
541
|
this.scrapeListCounter++;
|
|
547
|
-
|
|
542
|
+
name = `List ${this.scrapeListCounter}`;
|
|
548
543
|
}
|
|
549
544
|
if (!this.serializableDataByType[actionType])
|
|
550
545
|
this.serializableDataByType[actionType] = {};
|
|
551
|
-
if (!this.serializableDataByType[actionType][
|
|
552
|
-
this.serializableDataByType[actionType][
|
|
546
|
+
if (!this.serializableDataByType[actionType][name]) {
|
|
547
|
+
this.serializableDataByType[actionType][name] = [];
|
|
553
548
|
}
|
|
554
|
-
this.serializableDataByType[actionType][
|
|
549
|
+
this.serializableDataByType[actionType][name].push(...scrapeResults);
|
|
555
550
|
yield this.options.serializableCallback({
|
|
556
551
|
scrapeList: this.serializableDataByType.scrapeList,
|
|
557
552
|
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
@@ -560,15 +555,18 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
560
555
|
}
|
|
561
556
|
catch (error) {
|
|
562
557
|
console.error('ScrapeList action failed completely:', error.message);
|
|
563
|
-
// Don't throw error, just return empty array
|
|
564
558
|
const actionType = "scrapeList";
|
|
565
|
-
|
|
559
|
+
let name = actionName || "";
|
|
560
|
+
if (!name || name.trim() === "") {
|
|
561
|
+
this.scrapeListCounter++;
|
|
562
|
+
name = `List ${this.scrapeListCounter}`;
|
|
563
|
+
}
|
|
566
564
|
if (!this.namedResults[actionType])
|
|
567
565
|
this.namedResults[actionType] = {};
|
|
568
|
-
this.namedResults[actionType][
|
|
566
|
+
this.namedResults[actionType][name] = [];
|
|
569
567
|
if (!this.serializableDataByType[actionType])
|
|
570
568
|
this.serializableDataByType[actionType] = {};
|
|
571
|
-
this.serializableDataByType[actionType][
|
|
569
|
+
this.serializableDataByType[actionType][name] = [];
|
|
572
570
|
yield this.options.serializableCallback({
|
|
573
571
|
scrapeList: this.serializableDataByType.scrapeList,
|
|
574
572
|
scrapeSchema: this.serializableDataByType.scrapeSchema
|
|
@@ -651,25 +649,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
651
649
|
if (debug === null || debug === void 0 ? void 0 : debug.setActionType) {
|
|
652
650
|
debug.setActionType(String(step.action));
|
|
653
651
|
}
|
|
654
|
-
|
|
655
|
-
if (step === null || step === void 0 ? void 0 : step.name) {
|
|
656
|
-
stepName = step.name;
|
|
657
|
-
}
|
|
658
|
-
else if (Array.isArray(step === null || step === void 0 ? void 0 : step.args) &&
|
|
659
|
-
step.args.length > 0 &&
|
|
660
|
-
typeof step.args[0] === "object" &&
|
|
661
|
-
"__name" in step.args[0]) {
|
|
662
|
-
stepName = step.args[0].__name;
|
|
663
|
-
}
|
|
664
|
-
else if (typeof (step === null || step === void 0 ? void 0 : step.args) === "object" &&
|
|
665
|
-
(step === null || step === void 0 ? void 0 : step.args) !== null &&
|
|
666
|
-
"__name" in step.args) {
|
|
667
|
-
stepName = step.args.__name;
|
|
668
|
-
}
|
|
669
|
-
// Default fallback
|
|
670
|
-
if (!stepName) {
|
|
671
|
-
stepName = String(step.action);
|
|
672
|
-
}
|
|
652
|
+
stepName = (step === null || step === void 0 ? void 0 : step.name) || String(step.action);
|
|
673
653
|
if (debug && typeof debug.setActionName === "function") {
|
|
674
654
|
debug.setActionName(stepName);
|
|
675
655
|
}
|
|
@@ -682,9 +662,12 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
682
662
|
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
|
683
663
|
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
|
|
684
664
|
if (step.action === 'screenshot') {
|
|
685
|
-
// call the screenshot handler directly to allow the extra name parameter
|
|
686
665
|
yield wawActions.screenshot(...(params !== null && params !== void 0 ? params : []), stepName !== null && stepName !== void 0 ? stepName : undefined);
|
|
687
666
|
}
|
|
667
|
+
else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') {
|
|
668
|
+
const actionName = step.name || "";
|
|
669
|
+
yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []), actionName);
|
|
670
|
+
}
|
|
688
671
|
else {
|
|
689
672
|
yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
|
|
690
673
|
}
|
|
@@ -744,16 +727,14 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
744
727
|
}
|
|
745
728
|
});
|
|
746
729
|
}
|
|
747
|
-
handlePagination(
|
|
748
|
-
return __awaiter(this,
|
|
749
|
-
// Check abort flag at start of pagination
|
|
730
|
+
handlePagination(page_1, config_1) {
|
|
731
|
+
return __awaiter(this, arguments, void 0, function* (page, config, providedActionName = "") {
|
|
750
732
|
if (this.isAborted) {
|
|
751
733
|
this.log('Workflow aborted, stopping pagination', logger_1.Level.WARN);
|
|
752
734
|
return [];
|
|
753
735
|
}
|
|
754
|
-
// Generate action name for this scrapeList
|
|
755
736
|
const actionType = "scrapeList";
|
|
756
|
-
let actionName =
|
|
737
|
+
let actionName = providedActionName || "";
|
|
757
738
|
if (!actionName || actionName.trim() === "") {
|
|
758
739
|
this.scrapeListCounter++;
|
|
759
740
|
actionName = `List ${this.scrapeListCounter}`;
|
|
@@ -2224,9 +2205,10 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2224
2205
|
* User-requested concurrency should be entirely managed by the concurrency manager,
|
|
2225
2206
|
* e.g. via `enqueueLinks`.
|
|
2226
2207
|
*/
|
|
2227
|
-
|
|
2208
|
+
const popupHandler = (popup) => {
|
|
2228
2209
|
this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
|
|
2229
|
-
}
|
|
2210
|
+
};
|
|
2211
|
+
p.on('popup', popupHandler);
|
|
2230
2212
|
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
|
2231
2213
|
let loopIterations = 0;
|
|
2232
2214
|
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
|
|
@@ -2234,41 +2216,58 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2234
2216
|
const MAX_CONSECUTIVE_FAILURES = 10;
|
|
2235
2217
|
const startTime = Date.now();
|
|
2236
2218
|
const MAX_EXECUTION_TIME = 30 * 60 * 1000; // 30 minutes max
|
|
2219
|
+
// Cleanup function to remove popup listener
|
|
2220
|
+
const cleanup = () => {
|
|
2221
|
+
try {
|
|
2222
|
+
if (!p.isClosed()) {
|
|
2223
|
+
p.removeListener('popup', popupHandler);
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2226
|
+
catch (cleanupError) {
|
|
2227
|
+
}
|
|
2228
|
+
};
|
|
2237
2229
|
while (true) {
|
|
2238
2230
|
// Multiple circuit breakers to prevent infinite loops
|
|
2239
2231
|
if (++loopIterations > MAX_LOOP_ITERATIONS) {
|
|
2240
2232
|
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', logger_1.Level.ERROR);
|
|
2233
|
+
cleanup();
|
|
2241
2234
|
return;
|
|
2242
2235
|
}
|
|
2243
2236
|
// Time-based circuit breaker
|
|
2244
2237
|
if (Date.now() - startTime > MAX_EXECUTION_TIME) {
|
|
2245
2238
|
this.log('Maximum execution time reached (30 minutes), terminating workflow', logger_1.Level.ERROR);
|
|
2239
|
+
cleanup();
|
|
2246
2240
|
return;
|
|
2247
2241
|
}
|
|
2248
2242
|
// Failure-based circuit breaker
|
|
2249
2243
|
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
2250
2244
|
this.log('Too many consecutive failures, terminating to prevent hang', logger_1.Level.ERROR);
|
|
2245
|
+
cleanup();
|
|
2251
2246
|
return;
|
|
2252
2247
|
}
|
|
2253
2248
|
// Check abort flag immediately
|
|
2254
2249
|
if (this.isAborted) {
|
|
2255
2250
|
this.log('Workflow aborted in runLoop', logger_1.Level.WARN);
|
|
2251
|
+
cleanup();
|
|
2256
2252
|
return;
|
|
2257
2253
|
}
|
|
2258
2254
|
// Checks whether the page was closed from outside,
|
|
2259
2255
|
// or the workflow execution has been stopped via `interpreter.stop()`
|
|
2260
2256
|
if (p.isClosed() || !this.stopper) {
|
|
2257
|
+
cleanup();
|
|
2261
2258
|
return;
|
|
2262
2259
|
}
|
|
2263
2260
|
try {
|
|
2264
2261
|
yield p.waitForLoadState();
|
|
2265
2262
|
}
|
|
2266
2263
|
catch (e) {
|
|
2264
|
+
cleanup();
|
|
2267
2265
|
yield p.close();
|
|
2268
2266
|
return;
|
|
2269
2267
|
}
|
|
2270
2268
|
if (workflowCopy.length === 0) {
|
|
2271
2269
|
this.log('All actions completed. Workflow finished.', logger_1.Level.LOG);
|
|
2270
|
+
cleanup();
|
|
2272
2271
|
return;
|
|
2273
2272
|
}
|
|
2274
2273
|
// const newSelectors = this.getSelectors(workflowCopy);
|
|
@@ -2359,6 +2358,7 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2359
2358
|
}
|
|
2360
2359
|
else {
|
|
2361
2360
|
//await this.disableAdBlocker(p);
|
|
2361
|
+
cleanup();
|
|
2362
2362
|
return;
|
|
2363
2363
|
}
|
|
2364
2364
|
}
|
|
@@ -2444,5 +2444,47 @@ class Interpreter extends events_1.EventEmitter {
|
|
|
2444
2444
|
}
|
|
2445
2445
|
});
|
|
2446
2446
|
}
|
|
2447
|
+
/**
|
|
2448
|
+
* Cleanup method to release resources and prevent memory leaks
|
|
2449
|
+
* Call this when the interpreter is no longer needed
|
|
2450
|
+
*/
|
|
2451
|
+
cleanup() {
|
|
2452
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
2453
|
+
try {
|
|
2454
|
+
// Stop any running workflows first
|
|
2455
|
+
if (this.stopper) {
|
|
2456
|
+
try {
|
|
2457
|
+
yield this.stop();
|
|
2458
|
+
}
|
|
2459
|
+
catch (error) {
|
|
2460
|
+
this.log(`Error stopping workflow during cleanup: ${error.message}`, logger_1.Level.WARN);
|
|
2461
|
+
}
|
|
2462
|
+
}
|
|
2463
|
+
// Clear ad-blocker resources
|
|
2464
|
+
if (this.blocker) {
|
|
2465
|
+
try {
|
|
2466
|
+
this.blocker = null;
|
|
2467
|
+
this.log('Ad-blocker resources cleared', logger_1.Level.DEBUG);
|
|
2468
|
+
}
|
|
2469
|
+
catch (error) {
|
|
2470
|
+
this.log(`Error cleaning up ad-blocker: ${error.message}`, logger_1.Level.WARN);
|
|
2471
|
+
}
|
|
2472
|
+
}
|
|
2473
|
+
// Clear accumulated data to free memory
|
|
2474
|
+
this.cumulativeResults = [];
|
|
2475
|
+
this.autohealFailures = [];
|
|
2476
|
+
this.namedResults = {};
|
|
2477
|
+
this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
|
|
2478
|
+
// Reset state
|
|
2479
|
+
this.isAborted = false;
|
|
2480
|
+
this.initializedWorkflow = null;
|
|
2481
|
+
this.log('Interpreter cleanup completed', logger_1.Level.DEBUG);
|
|
2482
|
+
}
|
|
2483
|
+
catch (error) {
|
|
2484
|
+
this.log(`Error during interpreter cleanup: ${error.message}`, logger_1.Level.ERROR);
|
|
2485
|
+
throw error;
|
|
2486
|
+
}
|
|
2487
|
+
});
|
|
2488
|
+
}
|
|
2447
2489
|
}
|
|
2448
2490
|
exports.default = Interpreter;
|