@arcblock/crawler-middleware 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/cache.d.ts +1 -0
- package/lib/cjs/cache.js +11 -10
- package/lib/cjs/index.js +2 -3
- package/lib/esm/cache.d.ts +1 -0
- package/lib/esm/cache.js +11 -7
- package/lib/esm/index.js +1 -2
- package/package.json +5 -5
package/lib/cjs/cache.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ export declare class CacheManager {
|
|
|
19
19
|
private initializedPromise;
|
|
20
20
|
private updateQueue;
|
|
21
21
|
constructor(options: CacheManagerOptions);
|
|
22
|
+
private initializeQueue;
|
|
22
23
|
waitReady(): Promise<void>;
|
|
23
24
|
getSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
24
25
|
setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
|
package/lib/cjs/cache.js
CHANGED
|
@@ -8,14 +8,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
12
|
exports.CacheManager = void 0;
|
|
16
|
-
const
|
|
13
|
+
const utils_1 = require("@arcblock/crawler/utils");
|
|
17
14
|
const lru_cache_1 = require("lru-cache");
|
|
18
|
-
const queue_1 = __importDefault(require("queue"));
|
|
19
15
|
const ufo_1 = require("ufo");
|
|
20
16
|
const env_1 = require("./env");
|
|
21
17
|
const index_1 = require("./store/index");
|
|
@@ -26,11 +22,16 @@ class CacheManager {
|
|
|
26
22
|
if (this.options.cacheMax > 0) {
|
|
27
23
|
this.cache = new lru_cache_1.LRUCache({ max: this.options.cacheMax || 500 });
|
|
28
24
|
}
|
|
29
|
-
this.
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
this.initializedPromise = Promise.all([(0, index_1.initDatabase)(), this.initializeQueue()]);
|
|
26
|
+
}
|
|
27
|
+
initializeQueue() {
|
|
28
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
29
|
+
const { default: Queue } = yield import('queue');
|
|
30
|
+
this.updateQueue = new Queue({
|
|
31
|
+
autostart: true,
|
|
32
|
+
concurrency: this.options.updatedConcurrency,
|
|
33
|
+
});
|
|
32
34
|
});
|
|
33
|
-
this.initializedPromise = Promise.all([(0, index_1.initDatabase)()]);
|
|
34
35
|
}
|
|
35
36
|
waitReady() {
|
|
36
37
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -66,7 +67,7 @@ class CacheManager {
|
|
|
66
67
|
const api = (0, ufo_1.joinURL)(endpoint, 'api/crawl');
|
|
67
68
|
env_1.logger.debug('Fetching snapshot from SnapKit', { url, api });
|
|
68
69
|
try {
|
|
69
|
-
const { data } = yield
|
|
70
|
+
const { data } = yield utils_1.axios.get(api, {
|
|
70
71
|
params: {
|
|
71
72
|
url,
|
|
72
73
|
},
|
package/lib/cjs/index.js
CHANGED
|
@@ -10,11 +10,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.createSnapshotMiddleware = createSnapshotMiddleware;
|
|
13
|
-
const
|
|
13
|
+
const utils_1 = require("@arcblock/crawler/utils");
|
|
14
14
|
const ufo_1 = require("ufo");
|
|
15
15
|
const cache_1 = require("./cache");
|
|
16
16
|
const env_1 = require("./env");
|
|
17
|
-
const { isSelfCrawler, isSpider, isStaticFile } = crawler_1.utils;
|
|
18
17
|
function getFullUrl(req) {
|
|
19
18
|
const blockletPathname = req.headers['x-path-prefix']
|
|
20
19
|
? (0, ufo_1.joinURL)(req.headers['x-path-prefix'], req.originalUrl)
|
|
@@ -39,7 +38,7 @@ function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 0, updateInt
|
|
|
39
38
|
return next();
|
|
40
39
|
}
|
|
41
40
|
const fullUrl = getFullUrl(req);
|
|
42
|
-
if (!isSpider(req) || isSelfCrawler(req) || isStaticFile(req)) {
|
|
41
|
+
if (!(0, utils_1.isSpider)(req) || (0, utils_1.isSelfCrawler)(req) || (0, utils_1.isStaticFile)(req)) {
|
|
43
42
|
return next();
|
|
44
43
|
}
|
|
45
44
|
// fetch content from SnapKit and cache it
|
package/lib/esm/cache.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ export declare class CacheManager {
|
|
|
19
19
|
private initializedPromise;
|
|
20
20
|
private updateQueue;
|
|
21
21
|
constructor(options: CacheManagerOptions);
|
|
22
|
+
private initializeQueue;
|
|
22
23
|
waitReady(): Promise<void>;
|
|
23
24
|
getSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
24
25
|
setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
|
package/lib/esm/cache.js
CHANGED
|
@@ -7,9 +7,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
import {
|
|
10
|
+
import { axios } from '@arcblock/crawler/utils';
|
|
11
11
|
import { LRUCache } from 'lru-cache';
|
|
12
|
-
import Queue from 'queue';
|
|
13
12
|
import { joinURL } from 'ufo';
|
|
14
13
|
import { logger } from './env';
|
|
15
14
|
import { Snapshot, initDatabase } from './store/index';
|
|
@@ -20,11 +19,16 @@ export class CacheManager {
|
|
|
20
19
|
if (this.options.cacheMax > 0) {
|
|
21
20
|
this.cache = new LRUCache({ max: this.options.cacheMax || 500 });
|
|
22
21
|
}
|
|
23
|
-
this.
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
this.initializedPromise = Promise.all([initDatabase(), this.initializeQueue()]);
|
|
23
|
+
}
|
|
24
|
+
initializeQueue() {
|
|
25
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
26
|
+
const { default: Queue } = yield import('queue');
|
|
27
|
+
this.updateQueue = new Queue({
|
|
28
|
+
autostart: true,
|
|
29
|
+
concurrency: this.options.updatedConcurrency,
|
|
30
|
+
});
|
|
26
31
|
});
|
|
27
|
-
this.initializedPromise = Promise.all([initDatabase()]);
|
|
28
32
|
}
|
|
29
33
|
waitReady() {
|
|
30
34
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -60,7 +64,7 @@ export class CacheManager {
|
|
|
60
64
|
const api = joinURL(endpoint, 'api/crawl');
|
|
61
65
|
logger.debug('Fetching snapshot from SnapKit', { url, api });
|
|
62
66
|
try {
|
|
63
|
-
const { data } = yield
|
|
67
|
+
const { data } = yield axios.get(api, {
|
|
64
68
|
params: {
|
|
65
69
|
url,
|
|
66
70
|
},
|
package/lib/esm/index.js
CHANGED
|
@@ -7,11 +7,10 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
import {
|
|
10
|
+
import { isSelfCrawler, isSpider, isStaticFile } from '@arcblock/crawler/utils';
|
|
11
11
|
import { joinURL } from 'ufo';
|
|
12
12
|
import { CacheManager } from './cache';
|
|
13
13
|
import { env, logger } from './env';
|
|
14
|
-
const { isSelfCrawler, isSpider, isStaticFile } = utils;
|
|
15
14
|
function getFullUrl(req) {
|
|
16
15
|
const blockletPathname = req.headers['x-path-prefix']
|
|
17
16
|
? joinURL(req.headers['x-path-prefix'], req.originalUrl)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arcblock/crawler-middleware",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.2",
|
|
4
4
|
"main": "lib/cjs/index.js",
|
|
5
5
|
"module": "lib/esm/index.js",
|
|
6
6
|
"types": "lib/cjs/index.d.ts",
|
|
@@ -40,9 +40,9 @@
|
|
|
40
40
|
]
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@abtnode/cron": "^1.16.
|
|
44
|
-
"@blocklet/logger": "^1.16.
|
|
45
|
-
"@blocklet/sdk": "^1.16.
|
|
43
|
+
"@abtnode/cron": "^1.16.46",
|
|
44
|
+
"@blocklet/logger": "^1.16.46",
|
|
45
|
+
"@blocklet/sdk": "^1.16.46",
|
|
46
46
|
"@sequelize/core": "7.0.0-alpha.46",
|
|
47
47
|
"@sequelize/sqlite3": "7.0.0-alpha.46",
|
|
48
48
|
"axios": "^1.7.9",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"queue": "^7.0.0",
|
|
51
51
|
"sqlite3": "^5.1.7",
|
|
52
52
|
"ufo": "^1.5.4",
|
|
53
|
-
"@arcblock/crawler": "1.3.
|
|
53
|
+
"@arcblock/crawler": "1.3.2"
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"@types/express": "^4.17.21",
|