@arcblock/crawler 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,11 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
2
  import { JobState, SnapshotModel } from './store';
3
+ declare let crawlQueue: any;
4
+ declare let syncQueue: any;
5
+ declare let codeQueue: any;
6
+ declare let cronQueue: any;
7
+ export { crawlQueue, syncQueue, codeQueue, cronQueue };
8
+ export declare function initQueue(): void;
3
9
  type PageHandler = {
4
10
  handleScreenshot?: (page: Page, params?: JobState) => Promise<Buffer | null>;
5
11
  handleHtml?: (page: Page, params?: JobState) => Promise<string | null>;
@@ -25,4 +31,3 @@ export declare const getPageContent: ({ url, includeScreenshot, includeHtml, wid
25
31
  export declare function enqueue(queue: any, params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
26
32
  export declare function crawlUrl(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
27
33
  export declare function crawlCode(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
28
- export {};
@@ -12,12 +12,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.getPageContent = void 0;
15
+ exports.getPageContent = exports.cronQueue = exports.codeQueue = exports.syncQueue = exports.crawlQueue = void 0;
16
+ exports.initQueue = initQueue;
16
17
  exports.createCrawlQueue = createCrawlQueue;
17
18
  exports.getDataDir = getDataDir;
18
19
  exports.enqueue = enqueue;
19
20
  exports.crawlUrl = crawlUrl;
20
21
  exports.crawlCode = crawlCode;
22
+ /* eslint-disable import/no-mutable-exports */
21
23
  const queue_1 = __importDefault(require("@abtnode/queue"));
22
24
  const sequelize_1 = __importDefault(require("@abtnode/queue/lib/store/sequelize"));
23
25
  const crypto_1 = require("crypto");
@@ -30,12 +32,18 @@ const snapshot_1 = require("./services/snapshot");
30
32
  const store_1 = require("./store");
31
33
  const utils_1 = require("./utils");
32
34
  const { BaseState } = require('@abtnode/models');
33
- // eslint-disable-next-line import/no-mutable-exports
34
- const crawlQueue = createCrawlQueue('urlCrawler');
35
- const syncQueue = createCrawlQueue('syncCrawler');
36
- const codeQueue = createCrawlQueue('codeCrawler', {
37
- handleScreenshot: carbon_1.createCarbonImage,
38
- });
35
+ let crawlQueue;
36
+ let syncQueue;
37
+ let codeQueue;
38
+ let cronQueue;
39
+ function initQueue() {
40
+ exports.crawlQueue = crawlQueue = createCrawlQueue('urlCrawler');
41
+ exports.syncQueue = syncQueue = createCrawlQueue('syncCrawler');
42
+ exports.codeQueue = codeQueue = createCrawlQueue('codeCrawler', {
43
+ handleScreenshot: carbon_1.createCarbonImage,
44
+ });
45
+ exports.cronQueue = cronQueue = createCrawlQueue('cronJobs');
46
+ }
39
47
  function createCrawlQueue(queue, handler) {
40
48
  const db = new BaseState(store_1.Job);
41
49
  return (0, queue_1.default)({
@@ -1,7 +1,5 @@
1
1
  import { Config } from './config';
2
2
  export * from './crawler';
3
- export * from './site';
4
3
  export * from './services/snapshot';
5
4
  export * as utils from './utils';
6
- export { migrate } from './store/migrate';
7
5
  export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
package/lib/cjs/index.js CHANGED
@@ -48,25 +48,26 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
48
48
  return (mod && mod.__esModule) ? mod : { "default": mod };
49
49
  };
50
50
  Object.defineProperty(exports, "__esModule", { value: true });
51
- exports.migrate = exports.utils = void 0;
51
+ exports.utils = void 0;
52
52
  exports.initCrawler = initCrawler;
53
53
  /* eslint-disable @typescript-eslint/indent */
54
54
  const merge_1 = __importDefault(require("lodash/merge"));
55
55
  const config_1 = require("./config");
56
+ const crawler_1 = require("./crawler");
56
57
  const cron_1 = require("./cron");
57
58
  const puppeteer_1 = require("./puppeteer");
59
+ const migrate_1 = require("./store/migrate");
58
60
  __exportStar(require("./crawler"), exports);
59
- __exportStar(require("./site"), exports);
60
61
  __exportStar(require("./services/snapshot"), exports);
61
62
  exports.utils = __importStar(require("./utils"));
62
- var migrate_1 = require("./store/migrate");
63
- Object.defineProperty(exports, "migrate", { enumerable: true, get: function () { return migrate_1.migrate; } });
64
63
  function initCrawler(params) {
65
64
  return __awaiter(this, void 0, void 0, function* () {
66
65
  var _a;
67
66
  (0, merge_1.default)(config_1.config, params);
68
67
  config_1.logger.info('Init crawler', { params, config: config_1.config });
69
68
  try {
69
+ yield (0, migrate_1.migrate)();
70
+ yield (0, crawler_1.initQueue)();
70
71
  yield (0, puppeteer_1.ensureBrowser)();
71
72
  if ((_a = config_1.config.siteCron) === null || _a === void 0 ? void 0 : _a.enabled) {
72
73
  yield (0, cron_1.initCron)();
@@ -1,37 +1,4 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
3
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
4
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -92,11 +59,12 @@ function ensureBrowser() {
92
59
  config_1.logger.debug('executablePath', executablePath);
93
60
  if (!executablePath || !fs_extra_1.default.existsSync(executablePath)) {
94
61
  config_1.logger.info('start download browser', puppeteerConfig);
62
+ // @ts-ignore
95
63
  const { downloadBrowser } = yield (() => __awaiter(this, void 0, void 0, function* () {
96
64
  try {
97
65
  // @ts-ignore
98
66
  // eslint-disable-next-line import/extensions
99
- return yield Promise.resolve().then(() => __importStar(require('@blocklet/puppeteer/internal/node/install.js')));
67
+ return yield import('@blocklet/puppeteer/internal/node/install.js');
100
68
  }
101
69
  catch (err) {
102
70
  config_1.logger.warn('Skipping browser installation because the Puppeteer build is not available. Run `npm install` again after you have re-built Puppeteer.');
@@ -1,3 +1,3 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
2
  import { JobState } from '../store';
3
- export declare function createCarbonImage(page: Page, params?: JobState): Promise<Buffer<ArrayBuffer>>;
3
+ export declare function createCarbonImage(page: Page, params?: JobState): Promise<Buffer<ArrayBufferLike>>;
@@ -11,23 +11,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.createCarbonImage = createCarbonImage;
13
13
  const config_1 = require("../config");
14
- // TODO expose local version of dom-to-image
15
- const DOM_TO_IMAGE_URL = 'https://unpkg.com/dom-to-image@2.6.0/dist/dom-to-image.min.js';
16
14
  function createCarbonImage(page, params) {
17
15
  return __awaiter(this, void 0, void 0, function* () {
18
16
  try {
19
- yield page.addScriptTag({ url: DOM_TO_IMAGE_URL });
20
17
  yield page.waitForSelector('.export-container', { visible: true, timeout: (params === null || params === void 0 ? void 0 : params.timeout) || 120 });
21
- const targetElement = yield page.$('.export-container');
22
- const format = (params === null || params === void 0 ? void 0 : params.format) || 'png';
23
- const dataUrl = yield page.evaluate((target = document, imageFormat = 'png') => {
24
- const query = new URLSearchParams(document.location.search);
25
- const EXPORT_SIZES_HASH = {
26
- '1x': '1',
27
- '2x': '2',
28
- '4x': '4',
29
- };
30
- const exportSize = EXPORT_SIZES_HASH[query.get('es')] || '2';
18
+ const targetElement = (yield page.$('.export-container'));
19
+ yield page.evaluate((target = document) => {
31
20
  if (!target) {
32
21
  throw new Error('Target element not found');
33
22
  }
@@ -40,44 +29,9 @@ function createCarbonImage(page, params) {
40
29
  });
41
30
  }
42
31
  });
43
- const width = target.offsetWidth * exportSize;
44
- const height = query.get('si') === 'true'
45
- ? target.offsetWidth * exportSize
46
- : target.offsetHeight * exportSize;
47
- const config = {
48
- style: {
49
- transform: `scale(${exportSize})`,
50
- 'transform-origin': 'center',
51
- background: query.get('si') ? query.get('bg') : 'none',
52
- },
53
- filter: (n) => {
54
- if (n.className) {
55
- return String(n.className).indexOf('eliminateOnRender') < 0;
56
- }
57
- return true;
58
- },
59
- width,
60
- height,
61
- };
62
- switch (imageFormat) {
63
- case 'jpeg':
64
- // @ts-ignore: domtoimage is injected by addScriptTag
65
- return domtoimage.toJpeg(target, config);
66
- case 'webp':
67
- // dom-to-image doesn't support webp directly, fall back to png
68
- // @ts-ignore: domtoimage is injected by addScriptTag
69
- return domtoimage.toPng(target, config);
70
- case 'png':
71
- default:
72
- // @ts-ignore: domtoimage is injected by addScriptTag
73
- return domtoimage.toPng(target, config);
74
- }
75
- }, targetElement, format);
76
- const base64Data = dataUrl.split(',')[1];
77
- if (!base64Data) {
78
- throw new Error('Failed to extract base64 data from image');
79
- }
80
- return Buffer.from(base64Data, 'base64');
32
+ }, targetElement);
33
+ const buffer = yield targetElement.screenshot({ type: (params === null || params === void 0 ? void 0 : params.format) || 'webp', quality: (params === null || params === void 0 ? void 0 : params.quality) || 100 });
34
+ return buffer;
81
35
  }
82
36
  catch (e) {
83
37
  config_1.logger.error('failed to crawl from carbon', { error: e });
package/lib/cjs/site.js CHANGED
@@ -15,13 +15,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
15
15
  exports.crawlSite = void 0;
16
16
  const uniq_1 = __importDefault(require("lodash/uniq"));
17
17
  const node_crypto_1 = require("node:crypto");
18
- const p_map_1 = __importDefault(require("p-map"));
19
18
  const config_1 = require("./config");
20
19
  const crawler_1 = require("./crawler");
21
20
  const store_1 = require("./store");
22
21
  const utils_1 = require("./utils");
23
22
  const crawlBlockletRunningMap = new Map();
24
- const crawlQueue = (0, crawler_1.createCrawlQueue)('cronJobs');
25
23
  function parseSitemapUrl(sitemapItem) {
26
24
  var _a;
27
25
  const links = ((_a = sitemapItem.links) === null || _a === void 0 ? void 0 : _a.map((item) => item.url)) || [];
@@ -30,6 +28,7 @@ function parseSitemapUrl(sitemapItem) {
30
28
  }
31
29
  const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, pathname, interval = 0 }) {
32
30
  var _b;
31
+ const { default: pMap } = yield import('p-map');
33
32
  config_1.logger.info(`Start crawl from sitemap ${url}`, { pathname });
34
33
  const key = `${url}-${pathname}`;
35
34
  if (crawlBlockletRunningMap.has(key)) {
@@ -48,7 +47,7 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
48
47
  let crawlCount = 0;
49
48
  crawlBlockletRunningMap.set(key, true);
50
49
  try {
51
- const jobIds = yield (0, p_map_1.default)(sitemapItems, (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, sitemapItem }) {
50
+ const jobIds = yield pMap(sitemapItems, (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, sitemapItem }) {
52
51
  processCount++;
53
52
  const snapshot = yield store_1.Snapshot.findOne({ where: { url: (0, utils_1.formatUrl)(url) } });
54
53
  if (snapshot === null || snapshot === void 0 ? void 0 : snapshot.lastModified) {
@@ -70,7 +69,7 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
70
69
  });
71
70
  crawlCount++;
72
71
  const jobId = (0, node_crypto_1.randomUUID)();
73
- crawlQueue.push({
72
+ crawler_1.cronQueue.push({
74
73
  id: jobId,
75
74
  url,
76
75
  lastModified: sitemapItem.lastmod,
@@ -1,6 +1,7 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
+ import Axios from 'axios';
2
3
  import { Request } from 'express';
3
- export declare const axios: import("axios").AxiosInstance;
4
+ export declare const axios: Axios.AxiosInstance;
4
5
  export declare const CRAWLER_FLAG = "x-arcblock-crawler";
5
6
  export declare const sleep: (ms: number) => Promise<unknown>;
6
7
  /**
@@ -1,5 +1,11 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
2
  import { JobState, SnapshotModel } from './store';
3
+ declare let crawlQueue: any;
4
+ declare let syncQueue: any;
5
+ declare let codeQueue: any;
6
+ declare let cronQueue: any;
7
+ export { crawlQueue, syncQueue, codeQueue, cronQueue };
8
+ export declare function initQueue(): void;
3
9
  type PageHandler = {
4
10
  handleScreenshot?: (page: Page, params?: JobState) => Promise<Buffer | null>;
5
11
  handleHtml?: (page: Page, params?: JobState) => Promise<string | null>;
@@ -25,4 +31,3 @@ export declare const getPageContent: ({ url, includeScreenshot, includeHtml, wid
25
31
  export declare function enqueue(queue: any, params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
26
32
  export declare function crawlUrl(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
27
33
  export declare function crawlCode(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
28
- export {};
@@ -7,6 +7,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
+ /* eslint-disable import/no-mutable-exports */
10
11
  import createQueue from '@abtnode/queue';
11
12
  import SequelizeStore from '@abtnode/queue/lib/store/sequelize';
12
13
  import { randomUUID } from 'crypto';
@@ -19,12 +20,19 @@ import { convertJobToSnapshot, deleteSnapshots, formatSnapshot } from './service
19
20
  import { Job, Snapshot, sequelize } from './store';
20
21
  import { findMaxScrollHeight, formatUrl, isAcceptCrawler, md5, sleep } from './utils';
21
22
  const { BaseState } = require('@abtnode/models');
22
- // eslint-disable-next-line import/no-mutable-exports
23
- const crawlQueue = createCrawlQueue('urlCrawler');
24
- const syncQueue = createCrawlQueue('syncCrawler');
25
- const codeQueue = createCrawlQueue('codeCrawler', {
26
- handleScreenshot: createCarbonImage,
27
- });
23
+ let crawlQueue;
24
+ let syncQueue;
25
+ let codeQueue;
26
+ let cronQueue;
27
+ export { crawlQueue, syncQueue, codeQueue, cronQueue };
28
+ export function initQueue() {
29
+ crawlQueue = createCrawlQueue('urlCrawler');
30
+ syncQueue = createCrawlQueue('syncCrawler');
31
+ codeQueue = createCrawlQueue('codeCrawler', {
32
+ handleScreenshot: createCarbonImage,
33
+ });
34
+ cronQueue = createCrawlQueue('cronJobs');
35
+ }
28
36
  export function createCrawlQueue(queue, handler) {
29
37
  const db = new BaseState(Job);
30
38
  return createQueue({
@@ -1,7 +1,5 @@
1
1
  import { Config } from './config';
2
2
  export * from './crawler';
3
- export * from './site';
4
3
  export * from './services/snapshot';
5
4
  export * as utils from './utils';
6
- export { migrate } from './store/migrate';
7
5
  export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
package/lib/esm/index.js CHANGED
@@ -10,19 +10,21 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  /* eslint-disable @typescript-eslint/indent */
11
11
  import merge from 'lodash/merge';
12
12
  import { config, logger } from './config';
13
+ import { initQueue } from './crawler';
13
14
  import { initCron } from './cron';
14
15
  import { ensureBrowser } from './puppeteer';
16
+ import { migrate } from './store/migrate';
15
17
  export * from './crawler';
16
- export * from './site';
17
18
  export * from './services/snapshot';
18
19
  export * as utils from './utils';
19
- export { migrate } from './store/migrate';
20
20
  export function initCrawler(params) {
21
21
  return __awaiter(this, void 0, void 0, function* () {
22
22
  var _a;
23
23
  merge(config, params);
24
24
  logger.info('Init crawler', { params, config });
25
25
  try {
26
+ yield migrate();
27
+ yield initQueue();
26
28
  yield ensureBrowser();
27
29
  if ((_a = config.siteCron) === null || _a === void 0 ? void 0 : _a.enabled) {
28
30
  yield initCron();
@@ -48,6 +48,7 @@ export function ensureBrowser() {
48
48
  logger.debug('executablePath', executablePath);
49
49
  if (!executablePath || !fs.existsSync(executablePath)) {
50
50
  logger.info('start download browser', puppeteerConfig);
51
+ // @ts-ignore
51
52
  const { downloadBrowser } = yield (() => __awaiter(this, void 0, void 0, function* () {
52
53
  try {
53
54
  // @ts-ignore
@@ -1,3 +1,3 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
2
  import { JobState } from '../store';
3
- export declare function createCarbonImage(page: Page, params?: JobState): Promise<Buffer<ArrayBuffer>>;
3
+ export declare function createCarbonImage(page: Page, params?: JobState): Promise<Buffer<ArrayBufferLike>>;
@@ -8,23 +8,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
8
8
  });
9
9
  };
10
10
  import { logger } from '../config';
11
- // TODO expose local version of dom-to-image
12
- const DOM_TO_IMAGE_URL = 'https://unpkg.com/dom-to-image@2.6.0/dist/dom-to-image.min.js';
13
11
  export function createCarbonImage(page, params) {
14
12
  return __awaiter(this, void 0, void 0, function* () {
15
13
  try {
16
- yield page.addScriptTag({ url: DOM_TO_IMAGE_URL });
17
14
  yield page.waitForSelector('.export-container', { visible: true, timeout: (params === null || params === void 0 ? void 0 : params.timeout) || 120 });
18
- const targetElement = yield page.$('.export-container');
19
- const format = (params === null || params === void 0 ? void 0 : params.format) || 'png';
20
- const dataUrl = yield page.evaluate((target = document, imageFormat = 'png') => {
21
- const query = new URLSearchParams(document.location.search);
22
- const EXPORT_SIZES_HASH = {
23
- '1x': '1',
24
- '2x': '2',
25
- '4x': '4',
26
- };
27
- const exportSize = EXPORT_SIZES_HASH[query.get('es')] || '2';
15
+ const targetElement = (yield page.$('.export-container'));
16
+ yield page.evaluate((target = document) => {
28
17
  if (!target) {
29
18
  throw new Error('Target element not found');
30
19
  }
@@ -37,44 +26,9 @@ export function createCarbonImage(page, params) {
37
26
  });
38
27
  }
39
28
  });
40
- const width = target.offsetWidth * exportSize;
41
- const height = query.get('si') === 'true'
42
- ? target.offsetWidth * exportSize
43
- : target.offsetHeight * exportSize;
44
- const config = {
45
- style: {
46
- transform: `scale(${exportSize})`,
47
- 'transform-origin': 'center',
48
- background: query.get('si') ? query.get('bg') : 'none',
49
- },
50
- filter: (n) => {
51
- if (n.className) {
52
- return String(n.className).indexOf('eliminateOnRender') < 0;
53
- }
54
- return true;
55
- },
56
- width,
57
- height,
58
- };
59
- switch (imageFormat) {
60
- case 'jpeg':
61
- // @ts-ignore: domtoimage is injected by addScriptTag
62
- return domtoimage.toJpeg(target, config);
63
- case 'webp':
64
- // dom-to-image doesn't support webp directly, fall back to png
65
- // @ts-ignore: domtoimage is injected by addScriptTag
66
- return domtoimage.toPng(target, config);
67
- case 'png':
68
- default:
69
- // @ts-ignore: domtoimage is injected by addScriptTag
70
- return domtoimage.toPng(target, config);
71
- }
72
- }, targetElement, format);
73
- const base64Data = dataUrl.split(',')[1];
74
- if (!base64Data) {
75
- throw new Error('Failed to extract base64 data from image');
76
- }
77
- return Buffer.from(base64Data, 'base64');
29
+ }, targetElement);
30
+ const buffer = yield targetElement.screenshot({ type: (params === null || params === void 0 ? void 0 : params.format) || 'webp', quality: (params === null || params === void 0 ? void 0 : params.quality) || 100 });
31
+ return buffer;
78
32
  }
79
33
  catch (e) {
80
34
  logger.error('failed to crawl from carbon', { error: e });
package/lib/esm/site.js CHANGED
@@ -9,13 +9,11 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  };
10
10
  import uniq from 'lodash/uniq';
11
11
  import { randomUUID } from 'node:crypto';
12
- import pMap from 'p-map';
13
12
  import { config, logger } from './config';
14
- import { createCrawlQueue } from './crawler';
13
+ import { cronQueue } from './crawler';
15
14
  import { Snapshot } from './store';
16
15
  import { formatUrl, getSitemapList } from './utils';
17
16
  const crawlBlockletRunningMap = new Map();
18
- const crawlQueue = createCrawlQueue('cronJobs');
19
17
  function parseSitemapUrl(sitemapItem) {
20
18
  var _a;
21
19
  const links = ((_a = sitemapItem.links) === null || _a === void 0 ? void 0 : _a.map((item) => item.url)) || [];
@@ -24,6 +22,7 @@ function parseSitemapUrl(sitemapItem) {
24
22
  }
25
23
  export const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, pathname, interval = 0 }) {
26
24
  var _b;
25
+ const { default: pMap } = yield import('p-map');
27
26
  logger.info(`Start crawl from sitemap ${url}`, { pathname });
28
27
  const key = `${url}-${pathname}`;
29
28
  if (crawlBlockletRunningMap.has(key)) {
@@ -64,7 +63,7 @@ export const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ ur
64
63
  });
65
64
  crawlCount++;
66
65
  const jobId = randomUUID();
67
- crawlQueue.push({
66
+ cronQueue.push({
68
67
  id: jobId,
69
68
  url,
70
69
  lastModified: sitemapItem.lastmod,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arcblock/crawler",
3
- "version": "1.3.0",
3
+ "version": "1.3.2",
4
4
  "main": "lib/cjs/index.js",
5
5
  "module": "lib/esm/index.js",
6
6
  "types": "lib/cjs/index.d.ts",
@@ -45,12 +45,12 @@
45
45
  ]
46
46
  },
47
47
  "dependencies": {
48
- "@abtnode/cron": "^1.16.44",
49
- "@abtnode/models": "^1.16.44",
50
- "@abtnode/queue": "^1.16.44",
51
- "@blocklet/logger": "^1.16.44",
48
+ "@abtnode/cron": "^1.16.46",
49
+ "@abtnode/models": "^1.16.46",
50
+ "@abtnode/queue": "^1.16.46",
51
+ "@blocklet/logger": "^1.16.46",
52
52
  "@blocklet/puppeteer": "^22.11.3",
53
- "@blocklet/sdk": "^1.16.44",
53
+ "@blocklet/sdk": "^1.16.46",
54
54
  "@sequelize/core": "7.0.0-alpha.46",
55
55
  "@sequelize/sqlite3": "7.0.0-alpha.46",
56
56
  "axios": "^1.7.9",