@arcblock/crawler 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/crawler.d.ts +3 -4
- package/lib/cjs/crawler.js +58 -32
- package/lib/cjs/index.d.ts +1 -0
- package/lib/cjs/index.js +3 -5
- package/lib/cjs/services/snapshot.d.ts +5 -2
- package/lib/cjs/services/snapshot.js +36 -6
- package/lib/cjs/site.d.ts +1 -1
- package/lib/cjs/site.js +9 -3
- package/lib/cjs/store/index.d.ts +4 -1
- package/lib/cjs/store/index.js +37 -45
- package/lib/cjs/store/job.d.ts +2 -0
- package/lib/cjs/store/migrate.d.ts +4 -0
- package/lib/cjs/store/migrate.js +63 -0
- package/lib/cjs/store/migrations/20250615-genesis.d.ts +6 -0
- package/lib/cjs/store/migrations/20250615-genesis.js +114 -0
- package/lib/cjs/store/migrations/20250616-replace.d.ts +6 -0
- package/lib/cjs/store/migrations/20250616-replace.js +40 -0
- package/lib/cjs/store/snapshot.d.ts +2 -0
- package/lib/cjs/store/snapshot.js +7 -0
- package/lib/esm/crawler.d.ts +3 -4
- package/lib/esm/crawler.js +55 -29
- package/lib/esm/index.d.ts +1 -0
- package/lib/esm/index.js +1 -4
- package/lib/esm/services/snapshot.d.ts +5 -2
- package/lib/esm/services/snapshot.js +33 -4
- package/lib/esm/site.d.ts +1 -1
- package/lib/esm/site.js +9 -3
- package/lib/esm/store/index.d.ts +4 -1
- package/lib/esm/store/index.js +23 -45
- package/lib/esm/store/job.d.ts +2 -0
- package/lib/esm/store/migrate.d.ts +4 -0
- package/lib/esm/store/migrate.js +26 -0
- package/lib/esm/store/migrations/20250615-genesis.d.ts +6 -0
- package/lib/esm/store/migrations/20250615-genesis.js +110 -0
- package/lib/esm/store/migrations/20250616-replace.d.ts +6 -0
- package/lib/esm/store/migrations/20250616-replace.js +36 -0
- package/lib/esm/store/snapshot.d.ts +2 -0
- package/lib/esm/store/snapshot.js +7 -0
- package/package.json +3 -2
package/lib/cjs/crawler.d.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import { JobState } from './store
|
|
2
|
-
|
|
3
|
-
export declare function createCrawlQueue(): void;
|
|
1
|
+
import { JobState, SnapshotModel } from './store';
|
|
2
|
+
export declare function createCrawlQueue(queue: string): any;
|
|
4
3
|
export declare function getDataDir(): Promise<{
|
|
5
4
|
htmlDir: string;
|
|
6
5
|
screenshotDir: string;
|
|
7
6
|
}>;
|
|
8
|
-
export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
|
|
7
|
+
export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, waitTime, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
|
|
9
8
|
html: string | null;
|
|
10
9
|
screenshot: Uint8Array<ArrayBufferLike> | null;
|
|
11
10
|
meta: {
|
package/lib/cjs/crawler.js
CHANGED
|
@@ -24,15 +24,15 @@ const path_1 = __importDefault(require("path"));
|
|
|
24
24
|
const config_1 = require("./config");
|
|
25
25
|
const puppeteer_1 = require("./puppeteer");
|
|
26
26
|
const snapshot_1 = require("./services/snapshot");
|
|
27
|
-
const
|
|
28
|
-
const snapshot_2 = require("./store/snapshot");
|
|
27
|
+
const store_1 = require("./store");
|
|
29
28
|
const utils_1 = require("./utils");
|
|
30
29
|
const { BaseState } = require('@abtnode/models');
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
// eslint-disable-next-line import/no-mutable-exports
|
|
31
|
+
const crawlQueue = createCrawlQueue('urlCrawler');
|
|
32
|
+
function createCrawlQueue(queue) {
|
|
33
|
+
const db = new BaseState(store_1.Job);
|
|
34
|
+
return (0, queue_1.default)({
|
|
35
|
+
store: new sequelize_1.default(db, queue),
|
|
36
36
|
concurrency: config_1.config.concurrency,
|
|
37
37
|
onJob: (job) => __awaiter(this, void 0, void 0, function* () {
|
|
38
38
|
config_1.logger.info('Starting to execute crawl job', job);
|
|
@@ -46,7 +46,7 @@ function createCrawlQueue() {
|
|
|
46
46
|
error: 'Denied by robots.txt',
|
|
47
47
|
},
|
|
48
48
|
});
|
|
49
|
-
yield
|
|
49
|
+
yield store_1.Snapshot.upsert(snapshot);
|
|
50
50
|
return snapshot;
|
|
51
51
|
}
|
|
52
52
|
// if index reach autoCloseBrowserCount, close browser
|
|
@@ -70,25 +70,42 @@ function createCrawlQueue() {
|
|
|
70
70
|
error: 'Failed to crawl content',
|
|
71
71
|
},
|
|
72
72
|
});
|
|
73
|
-
yield
|
|
73
|
+
yield store_1.Snapshot.upsert(snapshot);
|
|
74
74
|
return snapshot;
|
|
75
75
|
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
76
|
+
const snapshot = yield store_1.sequelize.transaction((txn) => __awaiter(this, void 0, void 0, function* () {
|
|
77
|
+
// delete old snapshot
|
|
78
|
+
if (formattedJob.replace) {
|
|
79
|
+
try {
|
|
80
|
+
const deletedJobIds = yield (0, snapshot_1.deleteSnapshots)({
|
|
81
|
+
url: formattedJob.url,
|
|
82
|
+
replace: true,
|
|
83
|
+
}, { txn });
|
|
84
|
+
if (deletedJobIds) {
|
|
85
|
+
config_1.logger.info('Deleted old snapshot', { deletedJobIds });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
config_1.logger.error('Failed to delete old snapshot', { error, formattedJob });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// save html and screenshot to data dir
|
|
93
|
+
const { screenshotPath, htmlPath } = yield saveSnapshotToLocal({
|
|
94
|
+
screenshot: result.screenshot,
|
|
95
|
+
html: result.html,
|
|
96
|
+
});
|
|
97
|
+
const snapshot = (0, snapshot_1.convertJobToSnapshot)({
|
|
98
|
+
job: formattedJob,
|
|
99
|
+
snapshot: {
|
|
100
|
+
status: 'success',
|
|
101
|
+
screenshot: screenshotPath === null || screenshotPath === void 0 ? void 0 : screenshotPath.replace(config_1.config.dataDir, ''),
|
|
102
|
+
html: htmlPath === null || htmlPath === void 0 ? void 0 : htmlPath.replace(config_1.config.dataDir, ''),
|
|
103
|
+
meta: result.meta,
|
|
104
|
+
},
|
|
105
|
+
});
|
|
106
|
+
yield store_1.Snapshot.upsert(snapshot, { transaction: txn });
|
|
107
|
+
return snapshot;
|
|
108
|
+
}));
|
|
92
109
|
return snapshot;
|
|
93
110
|
}
|
|
94
111
|
catch (error) {
|
|
@@ -100,7 +117,7 @@ function createCrawlQueue() {
|
|
|
100
117
|
error: 'Internal error',
|
|
101
118
|
},
|
|
102
119
|
});
|
|
103
|
-
yield
|
|
120
|
+
yield store_1.Snapshot.upsert(snapshot);
|
|
104
121
|
return snapshot;
|
|
105
122
|
}
|
|
106
123
|
}),
|
|
@@ -138,7 +155,7 @@ function saveSnapshotToLocal(_a) {
|
|
|
138
155
|
};
|
|
139
156
|
});
|
|
140
157
|
}
|
|
141
|
-
const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, fullPage = false, headers, cookies, localStorage, }) {
|
|
158
|
+
const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, waitTime = 0, fullPage = false, headers, cookies, localStorage, }) {
|
|
142
159
|
const page = yield (0, puppeteer_1.initPage)();
|
|
143
160
|
if (width && height) {
|
|
144
161
|
yield page.setViewport({ width, height, deviceScaleFactor: 2 });
|
|
@@ -175,9 +192,18 @@ const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url,
|
|
|
175
192
|
}
|
|
176
193
|
// await for networkidle0
|
|
177
194
|
// https://pptr.dev/api/puppeteer.page.waitfornetworkidle
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
195
|
+
try {
|
|
196
|
+
yield Promise.all([
|
|
197
|
+
page.waitForNetworkIdle({
|
|
198
|
+
idleTime: 1.5 * 1000,
|
|
199
|
+
timeout,
|
|
200
|
+
}),
|
|
201
|
+
(0, utils_1.sleep)(waitTime),
|
|
202
|
+
]);
|
|
203
|
+
}
|
|
204
|
+
catch (err) {
|
|
205
|
+
config_1.logger.warn(`Failed to wait for network idle in ${url}:`, err);
|
|
206
|
+
}
|
|
181
207
|
// get screenshot
|
|
182
208
|
if (includeScreenshot) {
|
|
183
209
|
// Try to find the tallest element and set the browser to the same height
|
|
@@ -221,7 +247,7 @@ const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url,
|
|
|
221
247
|
// check if the page is an error page
|
|
222
248
|
const isErrorPage = ['<h2>Unexpected Application Error!</h2>', 'Current route occurred an error'].some((errorHtml) => data.html.includes(errorHtml));
|
|
223
249
|
if (isErrorPage) {
|
|
224
|
-
throw new Error(
|
|
250
|
+
throw new Error(`${url} is an error page`);
|
|
225
251
|
}
|
|
226
252
|
meta.title = data.title;
|
|
227
253
|
meta.description = data.description;
|
|
@@ -257,7 +283,7 @@ exports.getPageContent = getPageContent;
|
|
|
257
283
|
function crawlUrl(params, callback) {
|
|
258
284
|
return __awaiter(this, void 0, void 0, function* () {
|
|
259
285
|
// skip duplicate job
|
|
260
|
-
const existsJob = yield
|
|
286
|
+
const existsJob = yield store_1.Job.isExists(params);
|
|
261
287
|
if (existsJob) {
|
|
262
288
|
config_1.logger.info(`Crawl job already exists for ${params.url}, skip`);
|
|
263
289
|
return existsJob.id;
|
package/lib/cjs/index.d.ts
CHANGED
|
@@ -3,4 +3,5 @@ export * from './crawler';
|
|
|
3
3
|
export * from './site';
|
|
4
4
|
export * from './services/snapshot';
|
|
5
5
|
export * as utils from './utils';
|
|
6
|
+
export { migrate } from './store/migrate';
|
|
6
7
|
export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
|
package/lib/cjs/index.js
CHANGED
|
@@ -48,28 +48,26 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
48
48
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
49
49
|
};
|
|
50
50
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
51
|
-
exports.utils = void 0;
|
|
51
|
+
exports.migrate = exports.utils = void 0;
|
|
52
52
|
exports.initCrawler = initCrawler;
|
|
53
53
|
/* eslint-disable @typescript-eslint/indent */
|
|
54
54
|
const merge_1 = __importDefault(require("lodash/merge"));
|
|
55
55
|
const config_1 = require("./config");
|
|
56
|
-
const crawler_1 = require("./crawler");
|
|
57
56
|
const cron_1 = require("./cron");
|
|
58
57
|
const puppeteer_1 = require("./puppeteer");
|
|
59
|
-
const store_1 = require("./store");
|
|
60
58
|
__exportStar(require("./crawler"), exports);
|
|
61
59
|
__exportStar(require("./site"), exports);
|
|
62
60
|
__exportStar(require("./services/snapshot"), exports);
|
|
63
61
|
exports.utils = __importStar(require("./utils"));
|
|
62
|
+
var migrate_1 = require("./store/migrate");
|
|
63
|
+
Object.defineProperty(exports, "migrate", { enumerable: true, get: function () { return migrate_1.migrate; } });
|
|
64
64
|
function initCrawler(params) {
|
|
65
65
|
return __awaiter(this, void 0, void 0, function* () {
|
|
66
66
|
var _a;
|
|
67
67
|
(0, merge_1.default)(config_1.config, params);
|
|
68
68
|
config_1.logger.info('Init crawler', { params, config: config_1.config });
|
|
69
69
|
try {
|
|
70
|
-
yield (0, store_1.initDatabase)();
|
|
71
70
|
yield (0, puppeteer_1.ensureBrowser)();
|
|
72
|
-
yield (0, crawler_1.createCrawlQueue)();
|
|
73
71
|
if ((_a = config_1.config.siteCron) === null || _a === void 0 ? void 0 : _a.enabled) {
|
|
74
72
|
yield (0, cron_1.initCron)();
|
|
75
73
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { SnapshotModel } from '../store
|
|
1
|
+
import { Transaction, WhereOptions } from '@sequelize/core';
|
|
2
|
+
import { JobState, SnapshotModel } from '../store';
|
|
3
3
|
export declare function convertJobToSnapshot({ job, snapshot }: {
|
|
4
4
|
job: JobState;
|
|
5
5
|
snapshot?: Partial<SnapshotModel>;
|
|
@@ -10,3 +10,6 @@ export declare function formatSnapshot(snapshot: SnapshotModel, columns?: Array<
|
|
|
10
10
|
*/
|
|
11
11
|
export declare function getSnapshot(jobId: string): Promise<SnapshotModel | null>;
|
|
12
12
|
export declare function getLatestSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
13
|
+
export declare function deleteSnapshots(where: WhereOptions<SnapshotModel>, { txn }?: {
|
|
14
|
+
txn?: Transaction;
|
|
15
|
+
}): Promise<string[]>;
|
|
@@ -16,17 +16,17 @@ exports.convertJobToSnapshot = convertJobToSnapshot;
|
|
|
16
16
|
exports.formatSnapshot = formatSnapshot;
|
|
17
17
|
exports.getSnapshot = getSnapshot;
|
|
18
18
|
exports.getLatestSnapshot = getLatestSnapshot;
|
|
19
|
+
exports.deleteSnapshots = deleteSnapshots;
|
|
19
20
|
const cloneDeep_1 = __importDefault(require("lodash/cloneDeep"));
|
|
20
21
|
const pick_1 = __importDefault(require("lodash/pick"));
|
|
21
22
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
22
23
|
const node_path_1 = __importDefault(require("node:path"));
|
|
23
24
|
const ufo_1 = require("ufo");
|
|
24
25
|
const config_1 = require("../config");
|
|
25
|
-
const
|
|
26
|
-
const snapshot_1 = require("../store/snapshot");
|
|
26
|
+
const store_1 = require("../store");
|
|
27
27
|
const utils_1 = require("../utils");
|
|
28
28
|
function convertJobToSnapshot({ job, snapshot }) {
|
|
29
|
-
return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), options: {
|
|
29
|
+
return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), replace: job.replace, options: {
|
|
30
30
|
width: job.width,
|
|
31
31
|
height: job.height,
|
|
32
32
|
includeScreenshot: job.includeScreenshot,
|
|
@@ -64,11 +64,11 @@ function formatSnapshot(snapshot, columns) {
|
|
|
64
64
|
*/
|
|
65
65
|
function getSnapshot(jobId) {
|
|
66
66
|
return __awaiter(this, void 0, void 0, function* () {
|
|
67
|
-
const snapshot = yield
|
|
67
|
+
const snapshot = yield store_1.Snapshot.findSnapshot({ where: { jobId } });
|
|
68
68
|
if (snapshot) {
|
|
69
69
|
return formatSnapshot(snapshot);
|
|
70
70
|
}
|
|
71
|
-
const job = yield
|
|
71
|
+
const job = yield store_1.Job.findJob({ id: jobId });
|
|
72
72
|
if (job) {
|
|
73
73
|
return {
|
|
74
74
|
jobId,
|
|
@@ -80,12 +80,42 @@ function getSnapshot(jobId) {
|
|
|
80
80
|
}
|
|
81
81
|
function getLatestSnapshot(url) {
|
|
82
82
|
return __awaiter(this, void 0, void 0, function* () {
|
|
83
|
-
const snapshot = yield
|
|
83
|
+
const snapshot = yield store_1.Snapshot.findSnapshot({
|
|
84
84
|
where: {
|
|
85
85
|
url: (0, utils_1.formatUrl)(url),
|
|
86
86
|
status: 'success',
|
|
87
87
|
},
|
|
88
|
+
order: [
|
|
89
|
+
['lastModified', 'DESC'],
|
|
90
|
+
['updatedAt', 'DESC'],
|
|
91
|
+
],
|
|
88
92
|
});
|
|
89
93
|
return snapshot ? formatSnapshot(snapshot) : null;
|
|
90
94
|
});
|
|
91
95
|
}
|
|
96
|
+
function deleteSnapshots(where_1) {
|
|
97
|
+
return __awaiter(this, arguments, void 0, function* (where, { txn } = {}) {
|
|
98
|
+
const snapshots = yield store_1.Snapshot.findAll({
|
|
99
|
+
where,
|
|
100
|
+
order: [
|
|
101
|
+
['lastModified', 'DESC'],
|
|
102
|
+
['updatedAt', 'DESC'],
|
|
103
|
+
],
|
|
104
|
+
});
|
|
105
|
+
const jobIds = yield Promise.all(snapshots.map((snapshot) => __awaiter(this, void 0, void 0, function* () {
|
|
106
|
+
try {
|
|
107
|
+
yield Promise.all([
|
|
108
|
+
snapshot.html && promises_1.default.unlink(node_path_1.default.join(config_1.config.dataDir, snapshot.html)),
|
|
109
|
+
snapshot.screenshot && promises_1.default.unlink(node_path_1.default.join(config_1.config.dataDir, snapshot.screenshot)),
|
|
110
|
+
]);
|
|
111
|
+
yield snapshot.destroy({ transaction: txn });
|
|
112
|
+
return snapshot.jobId;
|
|
113
|
+
}
|
|
114
|
+
catch (error) {
|
|
115
|
+
config_1.logger.error('Failed to delete snapshot', { error, snapshot });
|
|
116
|
+
throw error;
|
|
117
|
+
}
|
|
118
|
+
})));
|
|
119
|
+
return jobIds.filter(Boolean);
|
|
120
|
+
});
|
|
121
|
+
}
|
package/lib/cjs/site.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import { Site } from './config';
|
|
2
|
-
export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(string | null)[]>;
|
|
2
|
+
export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(`${string}-${string}-${string}-${string}-${string}` | null)[]>;
|
package/lib/cjs/site.js
CHANGED
|
@@ -14,12 +14,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
exports.crawlSite = void 0;
|
|
16
16
|
const uniq_1 = __importDefault(require("lodash/uniq"));
|
|
17
|
+
const node_crypto_1 = require("node:crypto");
|
|
17
18
|
const p_map_1 = __importDefault(require("p-map"));
|
|
18
19
|
const config_1 = require("./config");
|
|
19
20
|
const crawler_1 = require("./crawler");
|
|
20
|
-
const
|
|
21
|
+
const store_1 = require("./store");
|
|
21
22
|
const utils_1 = require("./utils");
|
|
22
23
|
const crawlBlockletRunningMap = new Map();
|
|
24
|
+
const crawlQueue = (0, crawler_1.createCrawlQueue)('cronJobs');
|
|
23
25
|
function parseSitemapUrl(sitemapItem) {
|
|
24
26
|
var _a;
|
|
25
27
|
const links = ((_a = sitemapItem.links) === null || _a === void 0 ? void 0 : _a.map((item) => item.url)) || [];
|
|
@@ -48,7 +50,7 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
|
|
|
48
50
|
try {
|
|
49
51
|
const jobIds = yield (0, p_map_1.default)(sitemapItems, (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, sitemapItem }) {
|
|
50
52
|
processCount++;
|
|
51
|
-
const snapshot = yield
|
|
53
|
+
const snapshot = yield store_1.Snapshot.findOne({ where: { url: (0, utils_1.formatUrl)(url) } });
|
|
52
54
|
if (snapshot === null || snapshot === void 0 ? void 0 : snapshot.lastModified) {
|
|
53
55
|
const lastModified = new Date(snapshot.lastModified);
|
|
54
56
|
// skip if snapshot lastModified is greater than sitemap lastmod
|
|
@@ -67,12 +69,16 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
|
|
|
67
69
|
url,
|
|
68
70
|
});
|
|
69
71
|
crawlCount++;
|
|
70
|
-
|
|
72
|
+
const jobId = (0, node_crypto_1.randomUUID)();
|
|
73
|
+
crawlQueue.push({
|
|
74
|
+
id: jobId,
|
|
71
75
|
url,
|
|
72
76
|
lastModified: sitemapItem.lastmod,
|
|
73
77
|
includeScreenshot: false,
|
|
74
78
|
includeHtml: true,
|
|
79
|
+
replace: true,
|
|
75
80
|
});
|
|
81
|
+
return jobId;
|
|
76
82
|
}), { concurrency: ((_b = config_1.config.siteCron) === null || _b === void 0 ? void 0 : _b.concurrency) || 30 });
|
|
77
83
|
config_1.logger.info('Enqueued jobs from sitemap finished', {
|
|
78
84
|
url,
|
package/lib/cjs/store/index.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
import { Sequelize } from '@sequelize/core';
|
|
2
2
|
import { SqliteDialect } from '@sequelize/sqlite3';
|
|
3
|
-
|
|
3
|
+
declare const sequelize: Sequelize<SqliteDialect>;
|
|
4
|
+
export { sequelize };
|
|
5
|
+
export * from './job';
|
|
6
|
+
export * from './snapshot';
|
package/lib/cjs/store/index.js
CHANGED
|
@@ -1,57 +1,49 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
10
15
|
};
|
|
11
16
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
17
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
18
|
};
|
|
14
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
exports.
|
|
20
|
+
exports.sequelize = void 0;
|
|
16
21
|
const core_1 = require("@sequelize/core");
|
|
17
22
|
const sqlite3_1 = require("@sequelize/sqlite3");
|
|
18
23
|
const path_1 = __importDefault(require("path"));
|
|
19
24
|
const config_1 = require("../config");
|
|
20
25
|
const job_1 = require("./job");
|
|
21
26
|
const snapshot_1 = require("./snapshot");
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
sequelize.query('pragma journal_size_limit = 67108864;'),
|
|
46
|
-
]);
|
|
47
|
-
yield sequelize.authenticate();
|
|
48
|
-
yield sequelize.sync({ alter: process.env.ALTER_SQLITE === 'true' });
|
|
49
|
-
config_1.logger.info('Successfully connected to database');
|
|
50
|
-
}
|
|
51
|
-
catch (error) {
|
|
52
|
-
config_1.logger.error('Failed to connect to database:', error);
|
|
53
|
-
throw error;
|
|
54
|
-
}
|
|
55
|
-
return sequelize;
|
|
56
|
-
});
|
|
57
|
-
}
|
|
27
|
+
const sequelize = new core_1.Sequelize({
|
|
28
|
+
dialect: sqlite3_1.SqliteDialect,
|
|
29
|
+
storage: path_1.default.join(config_1.config.dataDir, 'snap-kit.db'),
|
|
30
|
+
logging: (msg) => process.env.SQLITE_LOG && config_1.logger.debug(msg),
|
|
31
|
+
pool: {
|
|
32
|
+
min: 0,
|
|
33
|
+
max: 10,
|
|
34
|
+
idle: 10000,
|
|
35
|
+
},
|
|
36
|
+
retry: {
|
|
37
|
+
match: [/SQLITE_BUSY/],
|
|
38
|
+
name: 'query',
|
|
39
|
+
max: 10,
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
exports.sequelize = sequelize;
|
|
43
|
+
sequelize.query('pragma journal_mode = WAL;');
|
|
44
|
+
sequelize.query('pragma synchronous = normal;');
|
|
45
|
+
sequelize.query('pragma journal_size_limit = 67108864;');
|
|
46
|
+
job_1.Job.initModel(sequelize);
|
|
47
|
+
snapshot_1.Snapshot.initModel(sequelize);
|
|
48
|
+
__exportStar(require("./job"), exports);
|
|
49
|
+
__exportStar(require("./snapshot"), exports);
|
package/lib/cjs/store/job.d.ts
CHANGED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.umzug = void 0;
|
|
37
|
+
exports.migrate = migrate;
|
|
38
|
+
/* eslint-disable global-require */
|
|
39
|
+
const umzug_1 = require("umzug");
|
|
40
|
+
const index_1 = require("./index");
|
|
41
|
+
const migration20250615 = __importStar(require("./migrations/20250615-genesis"));
|
|
42
|
+
const migration20250616Replace = __importStar(require("./migrations/20250616-replace"));
|
|
43
|
+
const umzug = new umzug_1.Umzug({
|
|
44
|
+
migrations: [
|
|
45
|
+
{
|
|
46
|
+
name: '20250615-genesis',
|
|
47
|
+
up: ({ context }) => migration20250615.up({ context }),
|
|
48
|
+
down: ({ context }) => migration20250615.down({ context }),
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: '20250616-replace',
|
|
52
|
+
up: ({ context }) => migration20250616Replace.up({ context }),
|
|
53
|
+
down: ({ context }) => migration20250616Replace.down({ context }),
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
context: index_1.sequelize.getQueryInterface(),
|
|
57
|
+
storage: new umzug_1.SequelizeStorage({ sequelize: index_1.sequelize }),
|
|
58
|
+
logger: console,
|
|
59
|
+
});
|
|
60
|
+
exports.umzug = umzug;
|
|
61
|
+
function migrate() {
|
|
62
|
+
return umzug.up();
|
|
63
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.up = up;
|
|
13
|
+
exports.down = down;
|
|
14
|
+
/* eslint-disable no-console */
|
|
15
|
+
const core_1 = require("@sequelize/core");
|
|
16
|
+
function up(_a) {
|
|
17
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
18
|
+
console.log('[20250615-genesis:up] Migrating...');
|
|
19
|
+
yield context.createTable('snap', {
|
|
20
|
+
jobId: {
|
|
21
|
+
type: core_1.DataTypes.STRING,
|
|
22
|
+
primaryKey: true,
|
|
23
|
+
allowNull: false,
|
|
24
|
+
},
|
|
25
|
+
url: {
|
|
26
|
+
type: core_1.DataTypes.STRING,
|
|
27
|
+
allowNull: false,
|
|
28
|
+
index: true,
|
|
29
|
+
},
|
|
30
|
+
status: {
|
|
31
|
+
type: core_1.DataTypes.ENUM('success', 'failed', 'pending'),
|
|
32
|
+
allowNull: false,
|
|
33
|
+
},
|
|
34
|
+
html: {
|
|
35
|
+
type: core_1.DataTypes.TEXT,
|
|
36
|
+
allowNull: true,
|
|
37
|
+
},
|
|
38
|
+
screenshot: {
|
|
39
|
+
type: core_1.DataTypes.STRING,
|
|
40
|
+
allowNull: true,
|
|
41
|
+
},
|
|
42
|
+
error: {
|
|
43
|
+
type: core_1.DataTypes.STRING,
|
|
44
|
+
allowNull: true,
|
|
45
|
+
},
|
|
46
|
+
lastModified: {
|
|
47
|
+
type: core_1.DataTypes.STRING,
|
|
48
|
+
allowNull: true,
|
|
49
|
+
},
|
|
50
|
+
meta: {
|
|
51
|
+
type: core_1.DataTypes.JSON,
|
|
52
|
+
allowNull: true,
|
|
53
|
+
},
|
|
54
|
+
options: {
|
|
55
|
+
type: core_1.DataTypes.JSON,
|
|
56
|
+
allowNull: true,
|
|
57
|
+
},
|
|
58
|
+
createdAt: {
|
|
59
|
+
type: core_1.DataTypes.DATE,
|
|
60
|
+
defaultValue: core_1.DataTypes.NOW,
|
|
61
|
+
},
|
|
62
|
+
updatedAt: {
|
|
63
|
+
type: core_1.DataTypes.DATE,
|
|
64
|
+
defaultValue: core_1.DataTypes.NOW,
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
yield context.createTable('jobs', {
|
|
68
|
+
id: {
|
|
69
|
+
type: core_1.DataTypes.STRING(40),
|
|
70
|
+
primaryKey: true,
|
|
71
|
+
},
|
|
72
|
+
queue: {
|
|
73
|
+
type: core_1.DataTypes.STRING(32),
|
|
74
|
+
allowNull: false,
|
|
75
|
+
},
|
|
76
|
+
job: {
|
|
77
|
+
type: core_1.DataTypes.JSON,
|
|
78
|
+
allowNull: false,
|
|
79
|
+
},
|
|
80
|
+
retryCount: {
|
|
81
|
+
type: core_1.DataTypes.INTEGER,
|
|
82
|
+
},
|
|
83
|
+
delay: {
|
|
84
|
+
type: core_1.DataTypes.INTEGER,
|
|
85
|
+
},
|
|
86
|
+
willRunAt: {
|
|
87
|
+
type: core_1.DataTypes.INTEGER,
|
|
88
|
+
},
|
|
89
|
+
cancelled: {
|
|
90
|
+
type: core_1.DataTypes.BOOLEAN,
|
|
91
|
+
defaultValue: false,
|
|
92
|
+
},
|
|
93
|
+
createdAt: {
|
|
94
|
+
type: core_1.DataTypes.DATE,
|
|
95
|
+
defaultValue: core_1.DataTypes.NOW,
|
|
96
|
+
index: true,
|
|
97
|
+
},
|
|
98
|
+
updatedAt: {
|
|
99
|
+
type: core_1.DataTypes.DATE,
|
|
100
|
+
defaultValue: core_1.DataTypes.NOW,
|
|
101
|
+
index: true,
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
console.log('[20250615-genesis:up] Migrated successfully!');
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
function down(_a) {
|
|
108
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
109
|
+
console.log('[20250615-genesis:down] Migrating...');
|
|
110
|
+
yield context.dropTable('snap');
|
|
111
|
+
yield context.dropTable('jobs');
|
|
112
|
+
console.log('[20250615-genesis:down] Migrated successfully!');
|
|
113
|
+
});
|
|
114
|
+
}
|