@arcblock/crawler-middleware 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/lib/cjs/cache.d.ts +26 -0
- package/lib/cjs/cache.js +111 -0
- package/lib/cjs/env.d.ts +5 -0
- package/lib/cjs/env.js +14 -0
- package/lib/cjs/index.d.ts +18 -0
- package/lib/cjs/index.js +72 -0
- package/lib/cjs/store/index.d.ts +4 -0
- package/lib/cjs/store/index.js +66 -0
- package/lib/cjs/store/model-snapshot.d.ts +16 -0
- package/lib/cjs/store/model-snapshot.js +29 -0
- package/lib/esm/cache.d.ts +26 -0
- package/lib/esm/cache.js +107 -0
- package/lib/esm/env.d.ts +5 -0
- package/lib/esm/env.js +8 -0
- package/lib/esm/index.d.ts +18 -0
- package/lib/esm/index.js +69 -0
- package/lib/esm/store/index.d.ts +4 -0
- package/lib/esm/store/index.js +49 -0
- package/lib/esm/store/model-snapshot.d.ts +16 -0
- package/lib/esm/store/model-snapshot.js +25 -0
- package/package.json +101 -0
package/README.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# @arcblock/crawler-middleware
|
|
2
|
+
|
|
3
|
+
This express middleware provides pre-rendered HTML generated by SnapKit for Blocklets, enabling them to return complete HTML content to web spider. This is essential for SEO and ensuring that search engines can properly index dynamically generated content.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { createSnapshotMiddleware } from '@arcblock/crawler-middleware';
|
|
9
|
+
|
|
10
|
+
const app = express();
|
|
11
|
+
|
|
12
|
+
app.use(
|
|
13
|
+
createSnapshotMiddleware({
|
|
14
|
+
endpoint: process.env.SNAP_KIT_ENDPOINT,
|
|
15
|
+
accessKey: process.env.SNAP_KIT_ACCESS_KEY,
|
|
16
|
+
allowCrawler: (req) => {
|
|
17
|
+
return req.path === '/';
|
|
18
|
+
},
|
|
19
|
+
}),
|
|
20
|
+
);
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## How it Works
|
|
24
|
+
|
|
25
|
+
1. The middleware intercepts incoming requests.
|
|
26
|
+
2. It checks if the request is from a web crawler.
|
|
27
|
+
3. Try to read and return HTML from the local cache.
|
|
28
|
+
4. If the cache is not found, an asynchronous request is made to SnapKit, and the local cache is updated.
|
|
29
|
+
5. The current request does not return the cached content; the next crawler visit will hit step 3 and return the cache directly.
|
|
30
|
+
|
|
31
|
+
## Options
|
|
32
|
+
|
|
33
|
+
The options for createSnapshotMiddleware:
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
{
|
|
37
|
+
/** SnapKit endpoint */
|
|
38
|
+
endpoint: string;
|
|
39
|
+
/** SnapKit access key */
|
|
40
|
+
accessKey: string;
|
|
41
|
+
/** Max cache size for LRU cache */
|
|
42
|
+
cacheMax?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Cache update interval
|
|
45
|
+
* When cache exceeds this time, it will try to fetch and update cache from SnapKit
|
|
46
|
+
*/
|
|
47
|
+
cacheUpdateInterval?: number;
|
|
48
|
+
};
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Environment Variables
|
|
52
|
+
|
|
53
|
+
When using this middleware outside of a Blocklet environment, you need to configure the following environment variables:
|
|
54
|
+
|
|
55
|
+
- `BLOCKLET_APP_DATA_DIR`: (Required) Directory path for storing the sqlite file
|
|
56
|
+
- `BLOCKLET_LOG_DIR`: (Required) Directory path for storing @blocklet/logger logs
|
|
57
|
+
- `BLOCKLET_APP_URL`: (Optional) Deployed domain
|
|
58
|
+
|
|
59
|
+
You can set these variables in your `.env` file.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { SnapshotModel } from './store/index';
|
|
2
|
+
export type CacheManagerOptions = {
|
|
3
|
+
/** SnapKit endpoint */
|
|
4
|
+
endpoint: string;
|
|
5
|
+
/** SnapKit access key */
|
|
6
|
+
accessKey: string;
|
|
7
|
+
/** Max cache size for LRU cache */
|
|
8
|
+
cacheMax?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Cache update interval
|
|
11
|
+
* When cache exceeds this time, it will try to fetch and update cache from SnapKit
|
|
12
|
+
*/
|
|
13
|
+
cacheUpdateInterval?: number;
|
|
14
|
+
};
|
|
15
|
+
export declare class CacheManager {
|
|
16
|
+
private options;
|
|
17
|
+
private cache;
|
|
18
|
+
private initializedPromise;
|
|
19
|
+
constructor(options: CacheManagerOptions);
|
|
20
|
+
waitReady(): Promise<void>;
|
|
21
|
+
getSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
22
|
+
setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
|
|
23
|
+
fetchSnapKit(url: string): Promise<any>;
|
|
24
|
+
isCacheExpired(url: string): Promise<boolean>;
|
|
25
|
+
updateSnapshot(url: string): Promise<void>;
|
|
26
|
+
}
|
package/lib/cjs/cache.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.CacheManager = void 0;
|
|
13
|
+
const crawler_1 = require("@arcblock/crawler");
|
|
14
|
+
const lru_cache_1 = require("lru-cache");
|
|
15
|
+
const ufo_1 = require("ufo");
|
|
16
|
+
const env_1 = require("./env");
|
|
17
|
+
const index_1 = require("./store/index");
|
|
18
|
+
class CacheManager {
|
|
19
|
+
constructor(options) {
|
|
20
|
+
this.options = Object.assign({ cacheMax: 500, cacheUpdateInterval: 1000 * 60 * 60 * 24 }, options);
|
|
21
|
+
this.cache = new lru_cache_1.LRUCache({ max: this.options.cacheMax || 500 });
|
|
22
|
+
this.initializedPromise = Promise.all([(0, index_1.initDatabase)()]);
|
|
23
|
+
}
|
|
24
|
+
waitReady() {
|
|
25
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
26
|
+
yield this.initializedPromise;
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
getSnapshot(url) {
|
|
30
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
31
|
+
const cachedSnapshot = this.cache.get(url);
|
|
32
|
+
if (cachedSnapshot) {
|
|
33
|
+
return cachedSnapshot;
|
|
34
|
+
}
|
|
35
|
+
const snapshot = yield index_1.Snapshot.findOne({ where: { url } });
|
|
36
|
+
if (snapshot) {
|
|
37
|
+
this.cache.set(url, snapshot);
|
|
38
|
+
return snapshot;
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
setSnapshot(url, snapshot) {
|
|
44
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
45
|
+
yield index_1.Snapshot.create(snapshot);
|
|
46
|
+
this.cache.set(url, snapshot);
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
fetchSnapKit(url) {
|
|
50
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
51
|
+
const { endpoint, accessKey } = this.options;
|
|
52
|
+
const api = (0, ufo_1.joinURL)(endpoint, 'api/crawl');
|
|
53
|
+
env_1.logger.debug('Fetching snapshot from SnapKit', { url, api });
|
|
54
|
+
try {
|
|
55
|
+
const { data } = yield crawler_1.utils.axios.get(api, {
|
|
56
|
+
params: {
|
|
57
|
+
url,
|
|
58
|
+
},
|
|
59
|
+
headers: {
|
|
60
|
+
Authorization: `Bearer ${accessKey}`,
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
const { data: snapshotData } = data || {};
|
|
64
|
+
if ((snapshotData === null || snapshotData === void 0 ? void 0 : snapshotData.status) !== 'success') {
|
|
65
|
+
env_1.logger.info(`No valid HTML found for ${url} from SnapKit`, { snapshotData, data });
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
env_1.logger.info('Success to fetch content by SnapKit and cache it', {
|
|
69
|
+
url,
|
|
70
|
+
jobId: snapshotData.jobId,
|
|
71
|
+
lastModified: snapshotData.lastModified,
|
|
72
|
+
});
|
|
73
|
+
return snapshotData;
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
env_1.logger.error('Failed to fetch content by SnapKit', { url, error });
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
isCacheExpired(url) {
|
|
82
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
83
|
+
const snapshot = yield this.getSnapshot(url);
|
|
84
|
+
if (!snapshot) {
|
|
85
|
+
return true;
|
|
86
|
+
}
|
|
87
|
+
return Date.now() - new Date(snapshot.createdAt).getTime() > this.options.cacheUpdateInterval;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
updateSnapshot(url) {
|
|
91
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
92
|
+
try {
|
|
93
|
+
const snapshot = yield this.fetchSnapKit(url);
|
|
94
|
+
if (snapshot) {
|
|
95
|
+
// update db
|
|
96
|
+
const [updatedSnapshot] = yield index_1.Snapshot.upsert({
|
|
97
|
+
url,
|
|
98
|
+
html: snapshot.html,
|
|
99
|
+
lastModified: snapshot.lastModified,
|
|
100
|
+
});
|
|
101
|
+
// update cache
|
|
102
|
+
this.cache.set(url, updatedSnapshot);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
env_1.logger.error('Failed to update snapshot', { url, error });
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
exports.CacheManager = CacheManager;
|
package/lib/cjs/env.d.ts
ADDED
package/lib/cjs/env.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.logger = exports.env = void 0;
|
|
7
|
+
const logger_1 = __importDefault(require("@blocklet/logger"));
|
|
8
|
+
const config_1 = __importDefault(require("@blocklet/sdk/lib/config"));
|
|
9
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
10
|
+
exports.env = {
|
|
11
|
+
databasePath: node_path_1.default.join(config_1.default.env.dataDir, 'crawler-middleware/snapshot.db'),
|
|
12
|
+
appUrl: config_1.default.env.appUrl,
|
|
13
|
+
};
|
|
14
|
+
exports.logger = (0, logger_1.default)('@arcblock/crawler-middleware', { level: process.env.LOG_LEVEL || 'info' });
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { NextFunction, Request, Response } from 'express';
|
|
2
|
+
export declare function createSnapshotMiddleware({ endpoint, accessKey, cacheMax, cacheUpdateInterval, autoReturnHtml, allowCrawler, }: {
|
|
3
|
+
/** SnapKit endpoint */
|
|
4
|
+
endpoint: string;
|
|
5
|
+
/** SnapKit access key */
|
|
6
|
+
accessKey: string;
|
|
7
|
+
/** Max cache size for LRU cache */
|
|
8
|
+
cacheMax?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Cache update interval
|
|
11
|
+
* When cache exceeds this time, it will try to fetch and update cache from SnapKit
|
|
12
|
+
*/
|
|
13
|
+
cacheUpdateInterval?: number;
|
|
14
|
+
/** Call res.send(html) when cache hit */
|
|
15
|
+
autoReturnHtml?: boolean;
|
|
16
|
+
/** Custom function to determine whether to return cached content */
|
|
17
|
+
allowCrawler?: (req: Request) => boolean;
|
|
18
|
+
}): (req: Request, res: Response, next: NextFunction) => Promise<void>;
|
package/lib/cjs/index.js
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.createSnapshotMiddleware = createSnapshotMiddleware;
|
|
13
|
+
const crawler_1 = require("@arcblock/crawler");
|
|
14
|
+
const ufo_1 = require("ufo");
|
|
15
|
+
const cache_1 = require("./cache");
|
|
16
|
+
const env_1 = require("./env");
|
|
17
|
+
const { isSelfCrawler, isSpider, isStaticFile } = crawler_1.utils;
|
|
18
|
+
function getFullUrl(req) {
|
|
19
|
+
const blockletPathname = req.headers['x-path-prefix']
|
|
20
|
+
? (0, ufo_1.joinURL)(req.headers['x-path-prefix'], req.originalUrl)
|
|
21
|
+
: req.originalUrl;
|
|
22
|
+
return (0, ufo_1.joinURL)(env_1.env.appUrl || req.get('host'), blockletPathname);
|
|
23
|
+
}
|
|
24
|
+
function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 500, cacheUpdateInterval = 1000 * 60 * 60 * 24, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
25
|
+
if (!accessKey || !endpoint) {
|
|
26
|
+
throw new Error('accessKey and endpoint are required');
|
|
27
|
+
}
|
|
28
|
+
const cacheManager = new cache_1.CacheManager({
|
|
29
|
+
endpoint,
|
|
30
|
+
accessKey,
|
|
31
|
+
cacheMax,
|
|
32
|
+
cacheUpdateInterval,
|
|
33
|
+
});
|
|
34
|
+
return (req, res, next) => __awaiter(this, void 0, void 0, function* () {
|
|
35
|
+
yield cacheManager.waitReady();
|
|
36
|
+
if (!allowCrawler(req)) {
|
|
37
|
+
return next();
|
|
38
|
+
}
|
|
39
|
+
const fullUrl = getFullUrl(req);
|
|
40
|
+
// Always fetch content from SnapKit and cache it, even for non-crawler requests
|
|
41
|
+
if (yield cacheManager.isCacheExpired(fullUrl)) {
|
|
42
|
+
env_1.logger.info(`Cache expired for ${fullUrl}, fetching from SnapKit`);
|
|
43
|
+
// Don't await here, the cache will be effective after the next request
|
|
44
|
+
cacheManager.updateSnapshot(fullUrl);
|
|
45
|
+
}
|
|
46
|
+
if (!isSpider(req) || isSelfCrawler(req) || isStaticFile(req)) {
|
|
47
|
+
return next();
|
|
48
|
+
}
|
|
49
|
+
// cache hit
|
|
50
|
+
const cachedSnapshot = yield cacheManager.getSnapshot(fullUrl);
|
|
51
|
+
if (cachedSnapshot) {
|
|
52
|
+
// @ts-ignore
|
|
53
|
+
req.cachedHtml = cachedSnapshot.html;
|
|
54
|
+
if (cachedSnapshot.lastModified) {
|
|
55
|
+
// @ts-ignore
|
|
56
|
+
req.cachedLastmod = new Date(cachedSnapshot.lastModified).toUTCString();
|
|
57
|
+
res.setHeader('Last-Modified', cachedSnapshot.lastModified);
|
|
58
|
+
}
|
|
59
|
+
if (autoReturnHtml) {
|
|
60
|
+
env_1.logger.debug(`Cache hit: ${fullUrl} `, {
|
|
61
|
+
lastModified: cachedSnapshot.lastModified,
|
|
62
|
+
createdAt: cachedSnapshot.createdAt,
|
|
63
|
+
});
|
|
64
|
+
res.send(cachedSnapshot.html);
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
return next();
|
|
68
|
+
}
|
|
69
|
+
env_1.logger.debug(`Cache not hit: ${fullUrl}`);
|
|
70
|
+
return next();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
17
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
18
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
19
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
20
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
21
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
22
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
23
|
+
});
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.initDatabase = initDatabase;
|
|
27
|
+
const core_1 = require("@sequelize/core");
|
|
28
|
+
const sqlite3_1 = require("@sequelize/sqlite3");
|
|
29
|
+
const env_1 = require("../env");
|
|
30
|
+
const model_snapshot_1 = require("./model-snapshot");
|
|
31
|
+
__exportStar(require("./model-snapshot"), exports);
|
|
32
|
+
function initDatabase() {
|
|
33
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
34
|
+
const sequelize = new core_1.Sequelize({
|
|
35
|
+
dialect: sqlite3_1.SqliteDialect,
|
|
36
|
+
storage: env_1.env.databasePath,
|
|
37
|
+
logging: (msg) => process.env.SQLITE_LOG && env_1.logger.debug(msg),
|
|
38
|
+
pool: {
|
|
39
|
+
min: 0,
|
|
40
|
+
max: 10,
|
|
41
|
+
idle: 10000,
|
|
42
|
+
},
|
|
43
|
+
retry: {
|
|
44
|
+
match: [/SQLITE_BUSY/],
|
|
45
|
+
name: 'query',
|
|
46
|
+
max: 10,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
model_snapshot_1.Snapshot.initModel(sequelize);
|
|
50
|
+
try {
|
|
51
|
+
yield Promise.all([
|
|
52
|
+
sequelize.query('pragma journal_mode = WAL;'),
|
|
53
|
+
sequelize.query('pragma synchronous = normal;'),
|
|
54
|
+
sequelize.query('pragma journal_size_limit = 67108864;'),
|
|
55
|
+
]);
|
|
56
|
+
yield sequelize.authenticate();
|
|
57
|
+
yield sequelize.sync();
|
|
58
|
+
env_1.logger.info('Successfully connected to database');
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
env_1.logger.error('Failed to connect to database:', error);
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
return sequelize;
|
|
65
|
+
});
|
|
66
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Model, Sequelize } from '@sequelize/core';
|
|
2
|
+
export interface SnapshotModel {
|
|
3
|
+
url: string;
|
|
4
|
+
html: string;
|
|
5
|
+
lastModified?: string;
|
|
6
|
+
createdAt?: string;
|
|
7
|
+
updatedAt?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class Snapshot extends Model<SnapshotModel> implements SnapshotModel {
|
|
10
|
+
url: SnapshotModel['url'];
|
|
11
|
+
html: SnapshotModel['html'];
|
|
12
|
+
lastModified?: SnapshotModel['lastModified'];
|
|
13
|
+
createdAt: SnapshotModel['createdAt'];
|
|
14
|
+
updatedAt: SnapshotModel['updatedAt'];
|
|
15
|
+
static initModel(sequelize: Sequelize): void;
|
|
16
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Snapshot = void 0;
|
|
4
|
+
const core_1 = require("@sequelize/core");
|
|
5
|
+
class Snapshot extends core_1.Model {
|
|
6
|
+
static initModel(sequelize) {
|
|
7
|
+
Snapshot.init({
|
|
8
|
+
url: {
|
|
9
|
+
type: core_1.DataTypes.STRING,
|
|
10
|
+
allowNull: false,
|
|
11
|
+
primaryKey: true,
|
|
12
|
+
},
|
|
13
|
+
html: {
|
|
14
|
+
type: core_1.DataTypes.TEXT,
|
|
15
|
+
allowNull: false,
|
|
16
|
+
},
|
|
17
|
+
lastModified: {
|
|
18
|
+
type: core_1.DataTypes.STRING,
|
|
19
|
+
allowNull: true,
|
|
20
|
+
},
|
|
21
|
+
}, {
|
|
22
|
+
sequelize,
|
|
23
|
+
modelName: 'snapshot',
|
|
24
|
+
tableName: 'snap',
|
|
25
|
+
timestamps: true,
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
exports.Snapshot = Snapshot;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { SnapshotModel } from './store/index';
|
|
2
|
+
export type CacheManagerOptions = {
|
|
3
|
+
/** SnapKit endpoint */
|
|
4
|
+
endpoint: string;
|
|
5
|
+
/** SnapKit access key */
|
|
6
|
+
accessKey: string;
|
|
7
|
+
/** Max cache size for LRU cache */
|
|
8
|
+
cacheMax?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Cache update interval
|
|
11
|
+
* When cache exceeds this time, it will try to fetch and update cache from SnapKit
|
|
12
|
+
*/
|
|
13
|
+
cacheUpdateInterval?: number;
|
|
14
|
+
};
|
|
15
|
+
export declare class CacheManager {
|
|
16
|
+
private options;
|
|
17
|
+
private cache;
|
|
18
|
+
private initializedPromise;
|
|
19
|
+
constructor(options: CacheManagerOptions);
|
|
20
|
+
waitReady(): Promise<void>;
|
|
21
|
+
getSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
22
|
+
setSnapshot(url: string, snapshot: SnapshotModel): Promise<void>;
|
|
23
|
+
fetchSnapKit(url: string): Promise<any>;
|
|
24
|
+
isCacheExpired(url: string): Promise<boolean>;
|
|
25
|
+
updateSnapshot(url: string): Promise<void>;
|
|
26
|
+
}
|
package/lib/esm/cache.js
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { utils } from '@arcblock/crawler';
|
|
11
|
+
import { LRUCache } from 'lru-cache';
|
|
12
|
+
import { joinURL } from 'ufo';
|
|
13
|
+
import { logger } from './env';
|
|
14
|
+
import { Snapshot, initDatabase } from './store/index';
|
|
15
|
+
export class CacheManager {
|
|
16
|
+
constructor(options) {
|
|
17
|
+
this.options = Object.assign({ cacheMax: 500, cacheUpdateInterval: 1000 * 60 * 60 * 24 }, options);
|
|
18
|
+
this.cache = new LRUCache({ max: this.options.cacheMax || 500 });
|
|
19
|
+
this.initializedPromise = Promise.all([initDatabase()]);
|
|
20
|
+
}
|
|
21
|
+
waitReady() {
|
|
22
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
yield this.initializedPromise;
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
getSnapshot(url) {
|
|
27
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
const cachedSnapshot = this.cache.get(url);
|
|
29
|
+
if (cachedSnapshot) {
|
|
30
|
+
return cachedSnapshot;
|
|
31
|
+
}
|
|
32
|
+
const snapshot = yield Snapshot.findOne({ where: { url } });
|
|
33
|
+
if (snapshot) {
|
|
34
|
+
this.cache.set(url, snapshot);
|
|
35
|
+
return snapshot;
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
setSnapshot(url, snapshot) {
|
|
41
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
42
|
+
yield Snapshot.create(snapshot);
|
|
43
|
+
this.cache.set(url, snapshot);
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
fetchSnapKit(url) {
|
|
47
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
48
|
+
const { endpoint, accessKey } = this.options;
|
|
49
|
+
const api = joinURL(endpoint, 'api/crawl');
|
|
50
|
+
logger.debug('Fetching snapshot from SnapKit', { url, api });
|
|
51
|
+
try {
|
|
52
|
+
const { data } = yield utils.axios.get(api, {
|
|
53
|
+
params: {
|
|
54
|
+
url,
|
|
55
|
+
},
|
|
56
|
+
headers: {
|
|
57
|
+
Authorization: `Bearer ${accessKey}`,
|
|
58
|
+
},
|
|
59
|
+
});
|
|
60
|
+
const { data: snapshotData } = data || {};
|
|
61
|
+
if ((snapshotData === null || snapshotData === void 0 ? void 0 : snapshotData.status) !== 'success') {
|
|
62
|
+
logger.info(`No valid HTML found for ${url} from SnapKit`, { snapshotData, data });
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
logger.info('Success to fetch content by SnapKit and cache it', {
|
|
66
|
+
url,
|
|
67
|
+
jobId: snapshotData.jobId,
|
|
68
|
+
lastModified: snapshotData.lastModified,
|
|
69
|
+
});
|
|
70
|
+
return snapshotData;
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
logger.error('Failed to fetch content by SnapKit', { url, error });
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
isCacheExpired(url) {
|
|
79
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
80
|
+
const snapshot = yield this.getSnapshot(url);
|
|
81
|
+
if (!snapshot) {
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
return Date.now() - new Date(snapshot.createdAt).getTime() > this.options.cacheUpdateInterval;
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
updateSnapshot(url) {
|
|
88
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
89
|
+
try {
|
|
90
|
+
const snapshot = yield this.fetchSnapKit(url);
|
|
91
|
+
if (snapshot) {
|
|
92
|
+
// update db
|
|
93
|
+
const [updatedSnapshot] = yield Snapshot.upsert({
|
|
94
|
+
url,
|
|
95
|
+
html: snapshot.html,
|
|
96
|
+
lastModified: snapshot.lastModified,
|
|
97
|
+
});
|
|
98
|
+
// update cache
|
|
99
|
+
this.cache.set(url, updatedSnapshot);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
logger.error('Failed to update snapshot', { url, error });
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
package/lib/esm/env.d.ts
ADDED
package/lib/esm/env.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import createLogger from '@blocklet/logger';
|
|
2
|
+
import config from '@blocklet/sdk/lib/config';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
export const env = {
|
|
5
|
+
databasePath: path.join(config.env.dataDir, 'crawler-middleware/snapshot.db'),
|
|
6
|
+
appUrl: config.env.appUrl,
|
|
7
|
+
};
|
|
8
|
+
export const logger = createLogger('@arcblock/crawler-middleware', { level: process.env.LOG_LEVEL || 'info' });
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { NextFunction, Request, Response } from 'express';
|
|
2
|
+
export declare function createSnapshotMiddleware({ endpoint, accessKey, cacheMax, cacheUpdateInterval, autoReturnHtml, allowCrawler, }: {
|
|
3
|
+
/** SnapKit endpoint */
|
|
4
|
+
endpoint: string;
|
|
5
|
+
/** SnapKit access key */
|
|
6
|
+
accessKey: string;
|
|
7
|
+
/** Max cache size for LRU cache */
|
|
8
|
+
cacheMax?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Cache update interval
|
|
11
|
+
* When cache exceeds this time, it will try to fetch and update cache from SnapKit
|
|
12
|
+
*/
|
|
13
|
+
cacheUpdateInterval?: number;
|
|
14
|
+
/** Call res.send(html) when cache hit */
|
|
15
|
+
autoReturnHtml?: boolean;
|
|
16
|
+
/** Custom function to determine whether to return cached content */
|
|
17
|
+
allowCrawler?: (req: Request) => boolean;
|
|
18
|
+
}): (req: Request, res: Response, next: NextFunction) => Promise<void>;
|
package/lib/esm/index.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { utils } from '@arcblock/crawler';
|
|
11
|
+
import { joinURL } from 'ufo';
|
|
12
|
+
import { CacheManager } from './cache';
|
|
13
|
+
import { env, logger } from './env';
|
|
14
|
+
const { isSelfCrawler, isSpider, isStaticFile } = utils;
|
|
15
|
+
function getFullUrl(req) {
|
|
16
|
+
const blockletPathname = req.headers['x-path-prefix']
|
|
17
|
+
? joinURL(req.headers['x-path-prefix'], req.originalUrl)
|
|
18
|
+
: req.originalUrl;
|
|
19
|
+
return joinURL(env.appUrl || req.get('host'), blockletPathname);
|
|
20
|
+
}
|
|
21
|
+
export function createSnapshotMiddleware({ endpoint, accessKey, cacheMax = 500, cacheUpdateInterval = 1000 * 60 * 60 * 24, autoReturnHtml = true, allowCrawler = () => true, }) {
|
|
22
|
+
if (!accessKey || !endpoint) {
|
|
23
|
+
throw new Error('accessKey and endpoint are required');
|
|
24
|
+
}
|
|
25
|
+
const cacheManager = new CacheManager({
|
|
26
|
+
endpoint,
|
|
27
|
+
accessKey,
|
|
28
|
+
cacheMax,
|
|
29
|
+
cacheUpdateInterval,
|
|
30
|
+
});
|
|
31
|
+
return (req, res, next) => __awaiter(this, void 0, void 0, function* () {
|
|
32
|
+
yield cacheManager.waitReady();
|
|
33
|
+
if (!allowCrawler(req)) {
|
|
34
|
+
return next();
|
|
35
|
+
}
|
|
36
|
+
const fullUrl = getFullUrl(req);
|
|
37
|
+
// Always fetch content from SnapKit and cache it, even for non-crawler requests
|
|
38
|
+
if (yield cacheManager.isCacheExpired(fullUrl)) {
|
|
39
|
+
logger.info(`Cache expired for ${fullUrl}, fetching from SnapKit`);
|
|
40
|
+
// Don't await here, the cache will be effective after the next request
|
|
41
|
+
cacheManager.updateSnapshot(fullUrl);
|
|
42
|
+
}
|
|
43
|
+
if (!isSpider(req) || isSelfCrawler(req) || isStaticFile(req)) {
|
|
44
|
+
return next();
|
|
45
|
+
}
|
|
46
|
+
// cache hit
|
|
47
|
+
const cachedSnapshot = yield cacheManager.getSnapshot(fullUrl);
|
|
48
|
+
if (cachedSnapshot) {
|
|
49
|
+
// @ts-ignore
|
|
50
|
+
req.cachedHtml = cachedSnapshot.html;
|
|
51
|
+
if (cachedSnapshot.lastModified) {
|
|
52
|
+
// @ts-ignore
|
|
53
|
+
req.cachedLastmod = new Date(cachedSnapshot.lastModified).toUTCString();
|
|
54
|
+
res.setHeader('Last-Modified', cachedSnapshot.lastModified);
|
|
55
|
+
}
|
|
56
|
+
if (autoReturnHtml) {
|
|
57
|
+
logger.debug(`Cache hit: ${fullUrl} `, {
|
|
58
|
+
lastModified: cachedSnapshot.lastModified,
|
|
59
|
+
createdAt: cachedSnapshot.createdAt,
|
|
60
|
+
});
|
|
61
|
+
res.send(cachedSnapshot.html);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
return next();
|
|
65
|
+
}
|
|
66
|
+
logger.debug(`Cache not hit: ${fullUrl}`);
|
|
67
|
+
return next();
|
|
68
|
+
});
|
|
69
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { Sequelize } from '@sequelize/core';
|
|
11
|
+
import { SqliteDialect } from '@sequelize/sqlite3';
|
|
12
|
+
import { env, logger } from '../env';
|
|
13
|
+
import { Snapshot } from './model-snapshot';
|
|
14
|
+
export * from './model-snapshot';
|
|
15
|
+
export function initDatabase() {
|
|
16
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
17
|
+
const sequelize = new Sequelize({
|
|
18
|
+
dialect: SqliteDialect,
|
|
19
|
+
storage: env.databasePath,
|
|
20
|
+
logging: (msg) => process.env.SQLITE_LOG && logger.debug(msg),
|
|
21
|
+
pool: {
|
|
22
|
+
min: 0,
|
|
23
|
+
max: 10,
|
|
24
|
+
idle: 10000,
|
|
25
|
+
},
|
|
26
|
+
retry: {
|
|
27
|
+
match: [/SQLITE_BUSY/],
|
|
28
|
+
name: 'query',
|
|
29
|
+
max: 10,
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
Snapshot.initModel(sequelize);
|
|
33
|
+
try {
|
|
34
|
+
yield Promise.all([
|
|
35
|
+
sequelize.query('pragma journal_mode = WAL;'),
|
|
36
|
+
sequelize.query('pragma synchronous = normal;'),
|
|
37
|
+
sequelize.query('pragma journal_size_limit = 67108864;'),
|
|
38
|
+
]);
|
|
39
|
+
yield sequelize.authenticate();
|
|
40
|
+
yield sequelize.sync();
|
|
41
|
+
logger.info('Successfully connected to database');
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
logger.error('Failed to connect to database:', error);
|
|
45
|
+
throw error;
|
|
46
|
+
}
|
|
47
|
+
return sequelize;
|
|
48
|
+
});
|
|
49
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Model, Sequelize } from '@sequelize/core';
|
|
2
|
+
export interface SnapshotModel {
|
|
3
|
+
url: string;
|
|
4
|
+
html: string;
|
|
5
|
+
lastModified?: string;
|
|
6
|
+
createdAt?: string;
|
|
7
|
+
updatedAt?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class Snapshot extends Model<SnapshotModel> implements SnapshotModel {
|
|
10
|
+
url: SnapshotModel['url'];
|
|
11
|
+
html: SnapshotModel['html'];
|
|
12
|
+
lastModified?: SnapshotModel['lastModified'];
|
|
13
|
+
createdAt: SnapshotModel['createdAt'];
|
|
14
|
+
updatedAt: SnapshotModel['updatedAt'];
|
|
15
|
+
static initModel(sequelize: Sequelize): void;
|
|
16
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { DataTypes, Model } from '@sequelize/core';
|
|
2
|
+
export class Snapshot extends Model {
|
|
3
|
+
static initModel(sequelize) {
|
|
4
|
+
Snapshot.init({
|
|
5
|
+
url: {
|
|
6
|
+
type: DataTypes.STRING,
|
|
7
|
+
allowNull: false,
|
|
8
|
+
primaryKey: true,
|
|
9
|
+
},
|
|
10
|
+
html: {
|
|
11
|
+
type: DataTypes.TEXT,
|
|
12
|
+
allowNull: false,
|
|
13
|
+
},
|
|
14
|
+
lastModified: {
|
|
15
|
+
type: DataTypes.STRING,
|
|
16
|
+
allowNull: true,
|
|
17
|
+
},
|
|
18
|
+
}, {
|
|
19
|
+
sequelize,
|
|
20
|
+
modelName: 'snapshot',
|
|
21
|
+
tableName: 'snap',
|
|
22
|
+
timestamps: true,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@arcblock/crawler-middleware",
|
|
3
|
+
"version": "1.1.1",
|
|
4
|
+
"main": "lib/cjs/index.js",
|
|
5
|
+
"module": "lib/esm/index.js",
|
|
6
|
+
"types": "lib/cjs/index.d.ts",
|
|
7
|
+
"publishConfig": {
|
|
8
|
+
"access": "public"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"lib",
|
|
12
|
+
"*.d.ts"
|
|
13
|
+
],
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"require": "./lib/cjs/index.js",
|
|
17
|
+
"import": "./lib/esm/index.js",
|
|
18
|
+
"types": "./lib/cjs/index.d.ts"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"lint-staged": {
|
|
22
|
+
"*.{mjs,js,jsx,ts,tsx}": [
|
|
23
|
+
"prettier --write",
|
|
24
|
+
"eslint"
|
|
25
|
+
],
|
|
26
|
+
"*.{css,less,scss,json,graphql}": [
|
|
27
|
+
"prettier --write"
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
"browserslist": {
|
|
31
|
+
"production": [
|
|
32
|
+
">0.2%",
|
|
33
|
+
"not dead",
|
|
34
|
+
"not op_mini all"
|
|
35
|
+
],
|
|
36
|
+
"development": [
|
|
37
|
+
"last 1 chrome version",
|
|
38
|
+
"last 1 firefox version",
|
|
39
|
+
"last 1 safari version"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"@abtnode/cron": "^1.16.43",
|
|
44
|
+
"@abtnode/models": "^1.16.43",
|
|
45
|
+
"@abtnode/queue": "^1.16.43",
|
|
46
|
+
"@blocklet/logger": "^1.16.43",
|
|
47
|
+
"@blocklet/puppeteer": "^22.11.3",
|
|
48
|
+
"@blocklet/sdk": "^1.16.43",
|
|
49
|
+
"@sequelize/core": "7.0.0-alpha.46",
|
|
50
|
+
"@sequelize/sqlite3": "7.0.0-alpha.46",
|
|
51
|
+
"axios": "^1.7.9",
|
|
52
|
+
"fs-extra": "^11.2.0",
|
|
53
|
+
"generic-pool": "^3.9.0",
|
|
54
|
+
"lodash": "^4.17.21",
|
|
55
|
+
"lru-cache": "^10.4.3",
|
|
56
|
+
"redis": "^4.7.0",
|
|
57
|
+
"robots-parser": "^3.0.1",
|
|
58
|
+
"sequelize": "^6.37.7",
|
|
59
|
+
"sitemap": "^7.1.2",
|
|
60
|
+
"sqlite3": "^5.1.7",
|
|
61
|
+
"ufo": "^1.5.4",
|
|
62
|
+
"@arcblock/crawler": "1.1.1"
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"@blocklet/js-sdk": "^1.16.39",
|
|
66
|
+
"@types/dotenv-flow": "^3.3.3",
|
|
67
|
+
"@types/express": "^4.17.21",
|
|
68
|
+
"@types/fs-extra": "^11.0.4",
|
|
69
|
+
"@types/lodash": "^4.17.16",
|
|
70
|
+
"@types/node": "^20.17.19",
|
|
71
|
+
"express": "^4.21.2",
|
|
72
|
+
"bumpp": "^9.11.1",
|
|
73
|
+
"nodemon": "^3.1.9",
|
|
74
|
+
"npm-run-all": "^4.1.5",
|
|
75
|
+
"puppeteer": "^24.8.2",
|
|
76
|
+
"tsx": "^4.19.3",
|
|
77
|
+
"zx": "^8.3.2"
|
|
78
|
+
},
|
|
79
|
+
"importSort": {
|
|
80
|
+
".js, .jsx, .mjs": {
|
|
81
|
+
"parser": "babylon",
|
|
82
|
+
"style": "module"
|
|
83
|
+
},
|
|
84
|
+
".ts, .tsx": {
|
|
85
|
+
"style": "module",
|
|
86
|
+
"parser": "typescript"
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
"simple-git-hooks": {
|
|
90
|
+
"pre-commit": "npx lint-staged"
|
|
91
|
+
},
|
|
92
|
+
"scripts": {
|
|
93
|
+
"dev": "tsc -p tsconfig.cjs.json --watch",
|
|
94
|
+
"lint": "tsc --noEmit && eslint src --ext .mjs,.js,.jsx,.ts,.tsx",
|
|
95
|
+
"lint:fix": "npm run lint -- --fix",
|
|
96
|
+
"bundle": "npm run build",
|
|
97
|
+
"build:cjs": "tsc -p tsconfig.cjs.json",
|
|
98
|
+
"build:esm": "tsc -p tsconfig.esm.json",
|
|
99
|
+
"build": "npm run build:cjs && npm run build:esm"
|
|
100
|
+
}
|
|
101
|
+
}
|