@arcblock/crawler-middleware 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ export declare class CacheManager {
19
19
  private initializedPromise;
20
20
  private updateQueue;
21
21
  constructor(options: CacheManagerOptions);
22
+ private initializeQueue;
22
23
  waitReady(): Promise<void>;
23
24
  getSnapshot(url: string): Promise<SnapshotModel | null>;
24
25
  setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
package/lib/cjs/cache.js CHANGED
@@ -8,14 +8,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
8
8
  step((generator = generator.apply(thisArg, _arguments || [])).next());
9
9
  });
10
10
  };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
11
  Object.defineProperty(exports, "__esModule", { value: true });
15
12
  exports.CacheManager = void 0;
16
- const crawler_1 = require("@arcblock/crawler");
13
+ const utils_1 = require("@arcblock/crawler/utils");
17
14
  const lru_cache_1 = require("lru-cache");
18
- const queue_1 = __importDefault(require("queue"));
19
15
  const ufo_1 = require("ufo");
20
16
  const env_1 = require("./env");
21
17
  const index_1 = require("./store/index");
@@ -26,11 +22,16 @@ class CacheManager {
26
22
  if (this.options.cacheMax > 0) {
27
23
  this.cache = new lru_cache_1.LRUCache({ max: this.options.cacheMax || 500 });
28
24
  }
29
- this.updateQueue = new queue_1.default({
30
- autostart: true,
31
- concurrency: this.options.updatedConcurrency,
25
+ this.initializedPromise = Promise.all([(0, index_1.initDatabase)(), this.initializeQueue()]);
26
+ }
27
+ initializeQueue() {
28
+ return __awaiter(this, void 0, void 0, function* () {
29
+ const { default: Queue } = yield import('queue');
30
+ this.updateQueue = new Queue({
31
+ autostart: true,
32
+ concurrency: this.options.updatedConcurrency,
33
+ });
32
34
  });
33
- this.initializedPromise = Promise.all([(0, index_1.initDatabase)()]);
34
35
  }
35
36
  waitReady() {
36
37
  return __awaiter(this, void 0, void 0, function* () {
@@ -66,7 +67,7 @@ class CacheManager {
66
67
  const api = (0, ufo_1.joinURL)(endpoint, 'api/crawl');
67
68
  env_1.logger.debug('Fetching snapshot from SnapKit', { url, api });
68
69
  try {
69
- const { data } = yield crawler_1.utils.axios.get(api, {
70
+ const { data } = yield utils_1.axios.get(api, {
70
71
  params: {
71
72
  url,
72
73
  },
package/lib/cjs/index.js CHANGED
@@ -10,11 +10,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.createSnapshotMiddleware = createSnapshotMiddleware;
13
- const crawler_1 = require("@arcblock/crawler");
13
+ const utils_1 = require("@arcblock/crawler/utils");
14
14
  const ufo_1 = require("ufo");
15
15
  const cache_1 = require("./cache");
16
16
  const env_1 = require("./env");
17
- const { isSelfCrawler, isSpider, isStaticFile } = crawler_1.utils;
18
17
  function getFullUrl(req) {
19
18
  const blockletPathname = req.headers['x-path-prefix']
20
19
  ? (0, ufo_1.joinURL)(req.headers['x-path-prefix'], req.originalUrl)
@@ -39,7 +38,7 @@ function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 0, updateInt
39
38
  return next();
40
39
  }
41
40
  const fullUrl = getFullUrl(req);
42
- if (!isSpider(req) || isSelfCrawler(req) || isStaticFile(req)) {
41
+ if (!(0, utils_1.isSpider)(req) || (0, utils_1.isSelfCrawler)(req) || (0, utils_1.isStaticFile)(req)) {
43
42
  return next();
44
43
  }
45
44
  // fetch content from SnapKit and cache it
@@ -19,6 +19,7 @@ export declare class CacheManager {
19
19
  private initializedPromise;
20
20
  private updateQueue;
21
21
  constructor(options: CacheManagerOptions);
22
+ private initializeQueue;
22
23
  waitReady(): Promise<void>;
23
24
  getSnapshot(url: string): Promise<SnapshotModel | null>;
24
25
  setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
package/lib/esm/cache.js CHANGED
@@ -7,9 +7,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import { utils } from '@arcblock/crawler';
10
+ import { axios } from '@arcblock/crawler/utils';
11
11
  import { LRUCache } from 'lru-cache';
12
- import Queue from 'queue';
13
12
  import { joinURL } from 'ufo';
14
13
  import { logger } from './env';
15
14
  import { Snapshot, initDatabase } from './store/index';
@@ -20,11 +19,16 @@ export class CacheManager {
20
19
  if (this.options.cacheMax > 0) {
21
20
  this.cache = new LRUCache({ max: this.options.cacheMax || 500 });
22
21
  }
23
- this.updateQueue = new Queue({
24
- autostart: true,
25
- concurrency: this.options.updatedConcurrency,
22
+ this.initializedPromise = Promise.all([initDatabase(), this.initializeQueue()]);
23
+ }
24
+ initializeQueue() {
25
+ return __awaiter(this, void 0, void 0, function* () {
26
+ const { default: Queue } = yield import('queue');
27
+ this.updateQueue = new Queue({
28
+ autostart: true,
29
+ concurrency: this.options.updatedConcurrency,
30
+ });
26
31
  });
27
- this.initializedPromise = Promise.all([initDatabase()]);
28
32
  }
29
33
  waitReady() {
30
34
  return __awaiter(this, void 0, void 0, function* () {
@@ -60,7 +64,7 @@ export class CacheManager {
60
64
  const api = joinURL(endpoint, 'api/crawl');
61
65
  logger.debug('Fetching snapshot from SnapKit', { url, api });
62
66
  try {
63
- const { data } = yield utils.axios.get(api, {
67
+ const { data } = yield axios.get(api, {
64
68
  params: {
65
69
  url,
66
70
  },
package/lib/esm/index.js CHANGED
@@ -7,11 +7,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import { utils } from '@arcblock/crawler';
10
+ import { isSelfCrawler, isSpider, isStaticFile } from '@arcblock/crawler/utils';
11
11
  import { joinURL } from 'ufo';
12
12
  import { CacheManager } from './cache';
13
13
  import { env, logger } from './env';
14
- const { isSelfCrawler, isSpider, isStaticFile } = utils;
15
14
  function getFullUrl(req) {
16
15
  const blockletPathname = req.headers['x-path-prefix']
17
16
  ? joinURL(req.headers['x-path-prefix'], req.originalUrl)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arcblock/crawler-middleware",
3
- "version": "1.3.0",
3
+ "version": "1.3.2",
4
4
  "main": "lib/cjs/index.js",
5
5
  "module": "lib/esm/index.js",
6
6
  "types": "lib/cjs/index.d.ts",
@@ -40,9 +40,9 @@
40
40
  ]
41
41
  },
42
42
  "dependencies": {
43
- "@abtnode/cron": "^1.16.44",
44
- "@blocklet/logger": "^1.16.44",
45
- "@blocklet/sdk": "^1.16.44",
43
+ "@abtnode/cron": "^1.16.46",
44
+ "@blocklet/logger": "^1.16.46",
45
+ "@blocklet/sdk": "^1.16.46",
46
46
  "@sequelize/core": "7.0.0-alpha.46",
47
47
  "@sequelize/sqlite3": "7.0.0-alpha.46",
48
48
  "axios": "^1.7.9",
@@ -50,7 +50,7 @@
50
50
  "queue": "^7.0.0",
51
51
  "sqlite3": "^5.1.7",
52
52
  "ufo": "^1.5.4",
53
- "@arcblock/crawler": "1.3.0"
53
+ "@arcblock/crawler": "1.3.2"
54
54
  },
55
55
  "devDependencies": {
56
56
  "@types/express": "^4.17.21",