mx-cloud 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,14 +30,16 @@ declare global {
30
30
  * Defines optional intepreter options (passed in constructor)
31
31
  */
32
32
  interface InterpreterOptions {
33
+ mode?: string;
33
34
  maxRepeats: number;
34
35
  maxConcurrency: number;
35
36
  serializableCallback: (output: any) => (void | Promise<void>);
36
37
  binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
37
38
  debug: boolean;
38
39
  debugChannel: Partial<{
39
- activeId: Function;
40
- debugMessage: Function;
40
+ activeId: (id: number) => void;
41
+ debugMessage: (msg: string) => void;
42
+ setActionType: (type: string) => void;
41
43
  }>;
42
44
  }
43
45
  /**
@@ -328,10 +328,18 @@ class Interpreter extends events_1.EventEmitter {
328
328
  */
329
329
  const wawActions = {
330
330
  screenshot: (params) => __awaiter(this, void 0, void 0, function* () {
331
+ var _a;
332
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
333
+ this.options.debugChannel.setActionType('screenshot');
334
+ }
331
335
  const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
332
336
  yield this.options.binaryCallback(screenshotBuffer, 'image/png');
333
337
  }),
334
338
  enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
339
+ var _a;
340
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
341
+ this.options.debugChannel.setActionType('enqueueLinks');
342
+ }
335
343
  const links = yield page.locator(selector)
336
344
  .evaluateAll(
337
345
  // @ts-ignore
@@ -357,40 +365,50 @@ class Interpreter extends events_1.EventEmitter {
357
365
  yield page.close();
358
366
  }),
359
367
  scrape: (selector) => __awaiter(this, void 0, void 0, function* () {
368
+ var _a;
369
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
370
+ this.options.debugChannel.setActionType('scrape');
371
+ }
360
372
  yield this.ensureScriptsLoaded(page);
361
373
  const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
362
374
  yield this.options.serializableCallback(scrapeResults);
363
375
  }),
364
376
  scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
377
+ var _a;
378
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
379
+ this.options.debugChannel.setActionType('scrapeSchema');
380
+ }
381
+ if (this.options.mode && this.options.mode === 'editor') {
382
+ yield this.options.serializableCallback({});
383
+ return;
384
+ }
365
385
  yield this.ensureScriptsLoaded(page);
366
386
  const scrapeResult = yield page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
367
- const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult];
368
- newResults.forEach((result) => {
369
- Object.entries(result).forEach(([key, value]) => {
370
- const keyExists = this.cumulativeResults.some((item) => key in item && item[key] !== undefined);
371
- if (!keyExists) {
372
- this.cumulativeResults.push({ [key]: value });
373
- }
374
- });
387
+ if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
388
+ this.cumulativeResults = [];
389
+ }
390
+ if (this.cumulativeResults.length === 0) {
391
+ this.cumulativeResults.push({});
392
+ }
393
+ const mergedResult = this.cumulativeResults[0];
394
+ const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
395
+ Object.entries(resultToProcess).forEach(([key, value]) => {
396
+ if (value !== undefined) {
397
+ mergedResult[key] = value;
398
+ }
375
399
  });
376
- const mergedResult = [
377
- Object.fromEntries(Object.entries(this.cumulativeResults.reduce((acc, curr) => {
378
- Object.entries(curr).forEach(([key, value]) => {
379
- // If the key doesn't exist or the current value is not undefined, add/update it
380
- if (value !== undefined) {
381
- acc[key] = value;
382
- }
383
- });
384
- return acc;
385
- }, {})))
386
- ];
387
- // Log cumulative results after each action
388
- console.log("CUMULATIVE results:", this.cumulativeResults);
389
- console.log("MERGED results:", mergedResult);
390
- yield this.options.serializableCallback(mergedResult);
391
- // await this.options.serializableCallback(scrapeResult);
400
+ console.log("Updated merged result:", mergedResult);
401
+ yield this.options.serializableCallback([mergedResult]);
392
402
  }),
393
403
  scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
404
+ var _a;
405
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
406
+ this.options.debugChannel.setActionType('scrapeList');
407
+ }
408
+ if (this.options.mode && this.options.mode === 'editor') {
409
+ yield this.options.serializableCallback({});
410
+ return;
411
+ }
394
412
  yield this.ensureScriptsLoaded(page);
395
413
  let scrapeResults = [];
396
414
  if (!config.pagination) {
@@ -402,6 +420,10 @@ class Interpreter extends events_1.EventEmitter {
402
420
  yield this.options.serializableCallback(scrapeResults);
403
421
  }),
404
422
  scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
423
+ var _a;
424
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
425
+ this.options.debugChannel.setActionType('scrapeListAuto');
426
+ }
405
427
  yield this.ensureScriptsLoaded(page);
406
428
  const scrapeResults = yield page.evaluate((listSelector) => {
407
429
  return window.scrapeListAuto(listSelector);
@@ -409,6 +431,10 @@ class Interpreter extends events_1.EventEmitter {
409
431
  yield this.options.serializableCallback(scrapeResults);
410
432
  }),
411
433
  scroll: (pages) => __awaiter(this, void 0, void 0, function* () {
434
+ var _a;
435
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
436
+ this.options.debugChannel.setActionType('scroll');
437
+ }
412
438
  yield page.evaluate((pagesInternal) => __awaiter(this, void 0, void 0, function* () {
413
439
  for (let i = 1; i <= (pagesInternal !== null && pagesInternal !== void 0 ? pagesInternal : 1); i += 1) {
414
440
  // @ts-ignore
@@ -417,12 +443,20 @@ class Interpreter extends events_1.EventEmitter {
417
443
  }), pages !== null && pages !== void 0 ? pages : 1);
418
444
  }),
419
445
  script: (code) => __awaiter(this, void 0, void 0, function* () {
446
+ var _a;
447
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
448
+ this.options.debugChannel.setActionType('script');
449
+ }
420
450
  const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
421
451
  const x = new AsyncFunction('page', 'log', code);
422
452
  yield x(page, this.log);
423
453
  }),
424
454
  flag: () => __awaiter(this, void 0, void 0, function* () {
425
455
  return new Promise((res) => {
456
+ var _a;
457
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.setActionType) {
458
+ this.options.debugChannel.setActionType('flag');
459
+ }
426
460
  this.emit('flag', page, res);
427
461
  });
428
462
  }),
@@ -494,6 +528,7 @@ class Interpreter extends events_1.EventEmitter {
494
528
  let visitedUrls = new Set();
495
529
  const MAX_RETRIES = 3;
496
530
  const RETRY_DELAY = 1000; // 1 second delay between retries
531
+ const MAX_UNCHANGED_RESULTS = 5;
497
532
  const debugLog = (message, ...args) => {
498
533
  console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
499
534
  };
@@ -572,28 +607,55 @@ class Interpreter extends events_1.EventEmitter {
572
607
  }
573
608
  });
574
609
  let availableSelectors = config.pagination.selector.split(',');
610
+ let unchangedResultCounter = 0;
575
611
  try {
576
612
  while (true) {
577
613
  switch (config.pagination.type) {
578
614
  case 'scrollDown': {
615
+ let previousResultCount = allResults.length;
616
+ yield scrapeCurrentPage();
617
+ if (checkLimit()) {
618
+ return allResults;
619
+ }
579
620
  yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
580
621
  yield page.waitForTimeout(2000);
581
622
  const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
623
+ const currentResultCount = allResults.length;
624
+ if (currentResultCount === previousResultCount) {
625
+ unchangedResultCounter++;
626
+ if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) {
627
+ return allResults;
628
+ }
629
+ }
630
+ else {
631
+ unchangedResultCounter = 0;
632
+ }
582
633
  if (currentHeight === previousHeight) {
583
- const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
584
- allResults = allResults.concat(finalResults);
585
634
  return allResults;
586
635
  }
587
636
  previousHeight = currentHeight;
588
637
  break;
589
638
  }
590
639
  case 'scrollUp': {
640
+ let previousResultCount = allResults.length;
641
+ yield scrapeCurrentPage();
642
+ if (checkLimit()) {
643
+ return allResults;
644
+ }
591
645
  yield page.evaluate(() => window.scrollTo(0, 0));
592
646
  yield page.waitForTimeout(2000);
593
647
  const currentTopHeight = yield page.evaluate(() => document.documentElement.scrollTop);
648
+ const currentResultCount = allResults.length;
649
+ if (currentResultCount === previousResultCount) {
650
+ unchangedResultCounter++;
651
+ if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) {
652
+ return allResults;
653
+ }
654
+ }
655
+ else {
656
+ unchangedResultCounter = 0;
657
+ }
594
658
  if (currentTopHeight === 0) {
595
- const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
596
- allResults = allResults.concat(finalResults);
597
659
  return allResults;
598
660
  }
599
661
  previousHeight = currentTopHeight;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",