@arcblock/crawler-middleware 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/cache.js +12 -6
- package/lib/cjs/index.js +1 -1
- package/lib/esm/cache.js +12 -6
- package/lib/esm/index.js +1 -1
- package/package.json +5 -5
package/lib/cjs/cache.js
CHANGED
|
@@ -21,8 +21,11 @@ const env_1 = require("./env");
|
|
|
21
21
|
const index_1 = require("./store/index");
|
|
22
22
|
class CacheManager {
|
|
23
23
|
constructor(options) {
|
|
24
|
-
this.
|
|
25
|
-
this.
|
|
24
|
+
this.cache = null;
|
|
25
|
+
this.options = Object.assign({ cacheMax: 0, updateInterval: 1000 * 60 * 60 * 24, failedUpdateInterval: 1000 * 60 * 60 * 24, updatedConcurrency: 10 }, options);
|
|
26
|
+
if (this.options.cacheMax > 0) {
|
|
27
|
+
this.cache = new lru_cache_1.LRUCache({ max: this.options.cacheMax || 500 });
|
|
28
|
+
}
|
|
26
29
|
this.updateQueue = new queue_1.default({
|
|
27
30
|
autostart: true,
|
|
28
31
|
concurrency: this.options.updatedConcurrency,
|
|
@@ -36,13 +39,14 @@ class CacheManager {
|
|
|
36
39
|
}
|
|
37
40
|
getSnapshot(url) {
|
|
38
41
|
return __awaiter(this, void 0, void 0, function* () {
|
|
39
|
-
|
|
42
|
+
var _a, _b;
|
|
43
|
+
const cachedSnapshot = (_a = this.cache) === null || _a === void 0 ? void 0 : _a.get(url);
|
|
40
44
|
if (cachedSnapshot) {
|
|
41
45
|
return cachedSnapshot;
|
|
42
46
|
}
|
|
43
47
|
const snapshot = yield index_1.Snapshot.findOne({ where: { url } });
|
|
44
48
|
if (snapshot) {
|
|
45
|
-
this.cache.set(url, snapshot);
|
|
49
|
+
(_b = this.cache) === null || _b === void 0 ? void 0 : _b.set(url, snapshot);
|
|
46
50
|
return snapshot;
|
|
47
51
|
}
|
|
48
52
|
return null;
|
|
@@ -50,8 +54,9 @@ class CacheManager {
|
|
|
50
54
|
}
|
|
51
55
|
setSnapshot(url, snapshot) {
|
|
52
56
|
return __awaiter(this, void 0, void 0, function* () {
|
|
57
|
+
var _a;
|
|
53
58
|
yield index_1.Snapshot.create(snapshot);
|
|
54
|
-
this.cache.set(url, snapshot);
|
|
59
|
+
(_a = this.cache) === null || _a === void 0 ? void 0 : _a.set(url, snapshot);
|
|
55
60
|
});
|
|
56
61
|
}
|
|
57
62
|
fetchSnapKit(url) {
|
|
@@ -99,6 +104,7 @@ class CacheManager {
|
|
|
99
104
|
}
|
|
100
105
|
updateSnapshot(url) {
|
|
101
106
|
return __awaiter(this, void 0, void 0, function* () {
|
|
107
|
+
var _a;
|
|
102
108
|
try {
|
|
103
109
|
const snapshot = yield this.fetchSnapKit(url);
|
|
104
110
|
// update db
|
|
@@ -108,7 +114,7 @@ class CacheManager {
|
|
|
108
114
|
lastModified: snapshot === null || snapshot === void 0 ? void 0 : snapshot.lastModified,
|
|
109
115
|
});
|
|
110
116
|
// update cache
|
|
111
|
-
this.cache.set(url, updatedSnapshot);
|
|
117
|
+
(_a = this.cache) === null || _a === void 0 ? void 0 : _a.set(url, updatedSnapshot);
|
|
112
118
|
}
|
|
113
119
|
catch (error) {
|
|
114
120
|
env_1.logger.error('Failed to update snapshot', { url, error });
|
package/lib/cjs/index.js
CHANGED
|
@@ -21,7 +21,7 @@ function getFullUrl(req) {
|
|
|
21
21
|
: req.originalUrl;
|
|
22
22
|
return (0, ufo_1.joinURL)(env_1.env.appUrl || req.get('host'), blockletPathname == null || blockletPathname === 'undefined' ? '/' : blockletPathname);
|
|
23
23
|
}
|
|
24
|
-
function createSnapshotMiddleware({ endpoint, accessKey, cacheMax =
|
|
24
|
+
function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 0, updateInterval = 1000 * 60 * 60 * 24, failedUpdateInterval = 1000 * 60 * 60 * 24, updatedConcurrency = 10, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
25
25
|
if (!accessKey || !endpoint) {
|
|
26
26
|
throw new Error('accessKey and endpoint are required');
|
|
27
27
|
}
|
package/lib/esm/cache.js
CHANGED
|
@@ -15,8 +15,11 @@ import { logger } from './env';
|
|
|
15
15
|
import { Snapshot, initDatabase } from './store/index';
|
|
16
16
|
export class CacheManager {
|
|
17
17
|
constructor(options) {
|
|
18
|
-
this.
|
|
19
|
-
this.
|
|
18
|
+
this.cache = null;
|
|
19
|
+
this.options = Object.assign({ cacheMax: 0, updateInterval: 1000 * 60 * 60 * 24, failedUpdateInterval: 1000 * 60 * 60 * 24, updatedConcurrency: 10 }, options);
|
|
20
|
+
if (this.options.cacheMax > 0) {
|
|
21
|
+
this.cache = new LRUCache({ max: this.options.cacheMax || 500 });
|
|
22
|
+
}
|
|
20
23
|
this.updateQueue = new Queue({
|
|
21
24
|
autostart: true,
|
|
22
25
|
concurrency: this.options.updatedConcurrency,
|
|
@@ -30,13 +33,14 @@ export class CacheManager {
|
|
|
30
33
|
}
|
|
31
34
|
getSnapshot(url) {
|
|
32
35
|
return __awaiter(this, void 0, void 0, function* () {
|
|
33
|
-
|
|
36
|
+
var _a, _b;
|
|
37
|
+
const cachedSnapshot = (_a = this.cache) === null || _a === void 0 ? void 0 : _a.get(url);
|
|
34
38
|
if (cachedSnapshot) {
|
|
35
39
|
return cachedSnapshot;
|
|
36
40
|
}
|
|
37
41
|
const snapshot = yield Snapshot.findOne({ where: { url } });
|
|
38
42
|
if (snapshot) {
|
|
39
|
-
this.cache.set(url, snapshot);
|
|
43
|
+
(_b = this.cache) === null || _b === void 0 ? void 0 : _b.set(url, snapshot);
|
|
40
44
|
return snapshot;
|
|
41
45
|
}
|
|
42
46
|
return null;
|
|
@@ -44,8 +48,9 @@ export class CacheManager {
|
|
|
44
48
|
}
|
|
45
49
|
setSnapshot(url, snapshot) {
|
|
46
50
|
return __awaiter(this, void 0, void 0, function* () {
|
|
51
|
+
var _a;
|
|
47
52
|
yield Snapshot.create(snapshot);
|
|
48
|
-
this.cache.set(url, snapshot);
|
|
53
|
+
(_a = this.cache) === null || _a === void 0 ? void 0 : _a.set(url, snapshot);
|
|
49
54
|
});
|
|
50
55
|
}
|
|
51
56
|
fetchSnapKit(url) {
|
|
@@ -93,6 +98,7 @@ export class CacheManager {
|
|
|
93
98
|
}
|
|
94
99
|
updateSnapshot(url) {
|
|
95
100
|
return __awaiter(this, void 0, void 0, function* () {
|
|
101
|
+
var _a;
|
|
96
102
|
try {
|
|
97
103
|
const snapshot = yield this.fetchSnapKit(url);
|
|
98
104
|
// update db
|
|
@@ -102,7 +108,7 @@ export class CacheManager {
|
|
|
102
108
|
lastModified: snapshot === null || snapshot === void 0 ? void 0 : snapshot.lastModified,
|
|
103
109
|
});
|
|
104
110
|
// update cache
|
|
105
|
-
this.cache.set(url, updatedSnapshot);
|
|
111
|
+
(_a = this.cache) === null || _a === void 0 ? void 0 : _a.set(url, updatedSnapshot);
|
|
106
112
|
}
|
|
107
113
|
catch (error) {
|
|
108
114
|
logger.error('Failed to update snapshot', { url, error });
|
package/lib/esm/index.js
CHANGED
|
@@ -18,7 +18,7 @@ function getFullUrl(req) {
|
|
|
18
18
|
: req.originalUrl;
|
|
19
19
|
return joinURL(env.appUrl || req.get('host'), blockletPathname == null || blockletPathname === 'undefined' ? '/' : blockletPathname);
|
|
20
20
|
}
|
|
21
|
-
export function createSnapshotMiddleware({ endpoint, accessKey, cacheMax =
|
|
21
|
+
export function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 0, updateInterval = 1000 * 60 * 60 * 24, failedUpdateInterval = 1000 * 60 * 60 * 24, updatedConcurrency = 10, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
22
22
|
if (!accessKey || !endpoint) {
|
|
23
23
|
throw new Error('accessKey and endpoint are required');
|
|
24
24
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arcblock/crawler-middleware",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.6",
|
|
4
4
|
"main": "lib/cjs/index.js",
|
|
5
5
|
"module": "lib/esm/index.js",
|
|
6
6
|
"types": "lib/cjs/index.d.ts",
|
|
@@ -40,9 +40,9 @@
|
|
|
40
40
|
]
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@abtnode/cron": "^1.16.
|
|
44
|
-
"@blocklet/logger": "^1.16.
|
|
45
|
-
"@blocklet/sdk": "^1.16.
|
|
43
|
+
"@abtnode/cron": "^1.16.44",
|
|
44
|
+
"@blocklet/logger": "^1.16.44",
|
|
45
|
+
"@blocklet/sdk": "^1.16.44",
|
|
46
46
|
"@sequelize/core": "7.0.0-alpha.46",
|
|
47
47
|
"@sequelize/sqlite3": "7.0.0-alpha.46",
|
|
48
48
|
"axios": "^1.7.9",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"queue": "^7.0.0",
|
|
51
51
|
"sqlite3": "^5.1.7",
|
|
52
52
|
"ufo": "^1.5.4",
|
|
53
|
-
"@arcblock/crawler": "1.1.
|
|
53
|
+
"@arcblock/crawler": "1.1.6"
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"@types/express": "^4.17.21",
|