@arcblock/crawler-middleware 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/lib/cjs/cache.js +1 -1
- package/lib/cjs/index.js +1 -1
- package/lib/esm/cache.js +1 -1
- package/lib/esm/index.js +1 -1
- package/package.json +2 -16
package/README.md
CHANGED
|
@@ -10,6 +10,13 @@ This express middleware provides pre-rendered HTML generated by SnapKit for Bloc
|
|
|
10
10
|
4. If the cache is not found, an asynchronous request is made to SnapKit, and the local cache is updated.
|
|
11
11
|
5. The current request does not return the cached content; the next spider visit will hit step 3 and return the cache directly.
|
|
12
12
|
|
|
13
|
+
## How to Verify
|
|
14
|
+
|
|
15
|
+
1. Update your browser's User Agent string to include "spider"
|
|
16
|
+
2. Visit a page that has already been crawled by SnapKit.
|
|
17
|
+
3. First Visit (Cache Miss): On your first visit, the cache should be missed. Check the server logs for a "Cache miss" message. and a request has been sent to SnapKit to cache the page.
|
|
18
|
+
4. Second Visit (Cache Hit): Wait a moment and then revisit the same page. The cache should be hit. The server logs should show a "Cache hit" message, and the returned HTML should include the meta tag: `<meta name="arcblock-crawler" content="true">.`
|
|
19
|
+
|
|
13
20
|
## Usage
|
|
14
21
|
|
|
15
22
|
```typescript
|
package/lib/cjs/cache.js
CHANGED
|
@@ -82,7 +82,7 @@ class CacheManager {
|
|
|
82
82
|
return snapshotData;
|
|
83
83
|
}
|
|
84
84
|
catch (error) {
|
|
85
|
-
env_1.logger.error('Failed to fetch content by SnapKit', { url, error, data: (_a = error === null || error === void 0 ? void 0 : error.response) === null || _a === void 0 ? void 0 : _a.data
|
|
85
|
+
env_1.logger.error('Failed to fetch content by SnapKit', { url, error, data: (_a = error === null || error === void 0 ? void 0 : error.response) === null || _a === void 0 ? void 0 : _a.data });
|
|
86
86
|
return null;
|
|
87
87
|
}
|
|
88
88
|
});
|
package/lib/cjs/index.js
CHANGED
|
@@ -19,7 +19,7 @@ function getFullUrl(req) {
|
|
|
19
19
|
const blockletPathname = req.headers['x-path-prefix']
|
|
20
20
|
? (0, ufo_1.joinURL)(req.headers['x-path-prefix'], req.originalUrl)
|
|
21
21
|
: req.originalUrl;
|
|
22
|
-
return (0, ufo_1.joinURL)(env_1.env.appUrl || req.get('host'), blockletPathname);
|
|
22
|
+
return (0, ufo_1.joinURL)(env_1.env.appUrl || req.get('host'), blockletPathname == null || blockletPathname === 'undefined' ? '/' : blockletPathname);
|
|
23
23
|
}
|
|
24
24
|
function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 500, updateInterval = 1000 * 60 * 60 * 24, failedUpdateInterval = 1000 * 60 * 60 * 24, updatedConcurrency = 10, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
25
25
|
if (!accessKey || !endpoint) {
|
package/lib/esm/cache.js
CHANGED
|
@@ -76,7 +76,7 @@ export class CacheManager {
|
|
|
76
76
|
return snapshotData;
|
|
77
77
|
}
|
|
78
78
|
catch (error) {
|
|
79
|
-
logger.error('Failed to fetch content by SnapKit', { url, error, data: (_a = error === null || error === void 0 ? void 0 : error.response) === null || _a === void 0 ? void 0 : _a.data
|
|
79
|
+
logger.error('Failed to fetch content by SnapKit', { url, error, data: (_a = error === null || error === void 0 ? void 0 : error.response) === null || _a === void 0 ? void 0 : _a.data });
|
|
80
80
|
return null;
|
|
81
81
|
}
|
|
82
82
|
});
|
package/lib/esm/index.js
CHANGED
|
@@ -16,7 +16,7 @@ function getFullUrl(req) {
|
|
|
16
16
|
const blockletPathname = req.headers['x-path-prefix']
|
|
17
17
|
? joinURL(req.headers['x-path-prefix'], req.originalUrl)
|
|
18
18
|
: req.originalUrl;
|
|
19
|
-
return joinURL(env.appUrl || req.get('host'), blockletPathname);
|
|
19
|
+
return joinURL(env.appUrl || req.get('host'), blockletPathname == null || blockletPathname === 'undefined' ? '/' : blockletPathname);
|
|
20
20
|
}
|
|
21
21
|
export function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 500, updateInterval = 1000 * 60 * 60 * 24, failedUpdateInterval = 1000 * 60 * 60 * 24, updatedConcurrency = 10, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
22
22
|
if (!accessKey || !endpoint) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arcblock/crawler-middleware",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.4",
|
|
4
4
|
"main": "lib/cjs/index.js",
|
|
5
5
|
"module": "lib/esm/index.js",
|
|
6
6
|
"types": "lib/cjs/index.d.ts",
|
|
@@ -41,38 +41,24 @@
|
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@abtnode/cron": "^1.16.43",
|
|
44
|
-
"@abtnode/models": "^1.16.43",
|
|
45
44
|
"@blocklet/logger": "^1.16.43",
|
|
46
|
-
"@blocklet/puppeteer": "^22.11.3",
|
|
47
45
|
"@blocklet/sdk": "^1.16.43",
|
|
48
46
|
"@sequelize/core": "7.0.0-alpha.46",
|
|
49
47
|
"@sequelize/sqlite3": "7.0.0-alpha.46",
|
|
50
48
|
"axios": "^1.7.9",
|
|
51
|
-
"fs-extra": "^11.2.0",
|
|
52
|
-
"generic-pool": "^3.9.0",
|
|
53
|
-
"lodash": "^4.17.21",
|
|
54
49
|
"lru-cache": "^10.4.3",
|
|
55
50
|
"queue": "^7.0.0",
|
|
56
|
-
"redis": "^4.7.0",
|
|
57
|
-
"robots-parser": "^3.0.1",
|
|
58
|
-
"sequelize": "^6.37.7",
|
|
59
|
-
"sitemap": "^7.1.2",
|
|
60
51
|
"sqlite3": "^5.1.7",
|
|
61
52
|
"ufo": "^1.5.4",
|
|
62
|
-
"@arcblock/crawler": "1.1.
|
|
53
|
+
"@arcblock/crawler": "1.1.4"
|
|
63
54
|
},
|
|
64
55
|
"devDependencies": {
|
|
65
|
-
"@blocklet/js-sdk": "^1.16.39",
|
|
66
|
-
"@types/dotenv-flow": "^3.3.3",
|
|
67
56
|
"@types/express": "^4.17.21",
|
|
68
|
-
"@types/fs-extra": "^11.0.4",
|
|
69
|
-
"@types/lodash": "^4.17.16",
|
|
70
57
|
"@types/node": "^20.17.19",
|
|
71
58
|
"bumpp": "^9.11.1",
|
|
72
59
|
"express": "^4.21.2",
|
|
73
60
|
"nodemon": "^3.1.9",
|
|
74
61
|
"npm-run-all": "^4.1.5",
|
|
75
|
-
"puppeteer": "^24.8.2",
|
|
76
62
|
"tsx": "^4.19.3",
|
|
77
63
|
"zx": "^8.3.2"
|
|
78
64
|
},
|