@arcblock/crawler 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/crawler.d.ts +3 -4
- package/lib/cjs/crawler.js +58 -32
- package/lib/cjs/index.d.ts +1 -0
- package/lib/cjs/index.js +3 -5
- package/lib/cjs/services/snapshot.d.ts +5 -2
- package/lib/cjs/services/snapshot.js +36 -6
- package/lib/cjs/site.d.ts +1 -1
- package/lib/cjs/site.js +9 -3
- package/lib/cjs/store/index.d.ts +4 -1
- package/lib/cjs/store/index.js +37 -45
- package/lib/cjs/store/job.d.ts +2 -0
- package/lib/cjs/store/migrate.d.ts +4 -0
- package/lib/cjs/store/migrate.js +63 -0
- package/lib/cjs/store/migrations/20250615-genesis.d.ts +6 -0
- package/lib/cjs/store/migrations/20250615-genesis.js +114 -0
- package/lib/cjs/store/migrations/20250616-replace.d.ts +6 -0
- package/lib/cjs/store/migrations/20250616-replace.js +40 -0
- package/lib/cjs/store/snapshot.d.ts +2 -0
- package/lib/cjs/store/snapshot.js +7 -0
- package/lib/esm/crawler.d.ts +3 -4
- package/lib/esm/crawler.js +55 -29
- package/lib/esm/index.d.ts +1 -0
- package/lib/esm/index.js +1 -4
- package/lib/esm/services/snapshot.d.ts +5 -2
- package/lib/esm/services/snapshot.js +33 -4
- package/lib/esm/site.d.ts +1 -1
- package/lib/esm/site.js +9 -3
- package/lib/esm/store/index.d.ts +4 -1
- package/lib/esm/store/index.js +23 -45
- package/lib/esm/store/job.d.ts +2 -0
- package/lib/esm/store/migrate.d.ts +4 -0
- package/lib/esm/store/migrate.js +26 -0
- package/lib/esm/store/migrations/20250615-genesis.d.ts +6 -0
- package/lib/esm/store/migrations/20250615-genesis.js +110 -0
- package/lib/esm/store/migrations/20250616-replace.d.ts +6 -0
- package/lib/esm/store/migrations/20250616-replace.js +36 -0
- package/lib/esm/store/snapshot.d.ts +2 -0
- package/lib/esm/store/snapshot.js +7 -0
- package/package.json +3 -2
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.up = up;
|
|
13
|
+
exports.down = down;
|
|
14
|
+
/* eslint-disable no-console */
|
|
15
|
+
const core_1 = require("@sequelize/core");
|
|
16
|
+
function up(_a) {
|
|
17
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
18
|
+
console.log('[20250616-replace:up] Migrating...');
|
|
19
|
+
yield context.addColumn('snap', 'replace', {
|
|
20
|
+
type: core_1.DataTypes.BOOLEAN,
|
|
21
|
+
allowNull: false,
|
|
22
|
+
defaultValue: false,
|
|
23
|
+
index: true,
|
|
24
|
+
});
|
|
25
|
+
yield context.addIndex('snap', ['createdAt']);
|
|
26
|
+
yield context.addIndex('snap', ['updatedAt']);
|
|
27
|
+
yield context.addIndex('snap', ['status']);
|
|
28
|
+
console.log('[20250616-replace:up] Migrated successfully!');
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
function down(_a) {
|
|
32
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
33
|
+
console.log('[20250616-replace:down] Migrating...');
|
|
34
|
+
yield context.removeColumn('snap', 'replace');
|
|
35
|
+
yield context.removeIndex('snap', ['createdAt']);
|
|
36
|
+
yield context.removeIndex('snap', ['updatedAt']);
|
|
37
|
+
yield context.removeIndex('snap', ['status']);
|
|
38
|
+
console.log('[20250616-replace:down] Migrated successfully!');
|
|
39
|
+
});
|
|
40
|
+
}
|
|
@@ -8,6 +8,7 @@ export interface SnapshotModel {
|
|
|
8
8
|
screenshot?: string | null;
|
|
9
9
|
error?: string;
|
|
10
10
|
lastModified?: string;
|
|
11
|
+
replace?: boolean;
|
|
11
12
|
meta?: {
|
|
12
13
|
title?: string;
|
|
13
14
|
description?: string;
|
|
@@ -35,6 +36,7 @@ export declare class Snapshot extends Model<SnapshotModel> implements SnapshotMo
|
|
|
35
36
|
screenshot?: SnapshotModel['screenshot'];
|
|
36
37
|
error?: SnapshotModel['error'];
|
|
37
38
|
lastModified?: SnapshotModel['lastModified'];
|
|
39
|
+
replace?: SnapshotModel['replace'];
|
|
38
40
|
meta?: SnapshotModel['meta'];
|
|
39
41
|
options: SnapshotModel['options'];
|
|
40
42
|
static initModel(sequelize: Sequelize): typeof Snapshot;
|
|
@@ -27,6 +27,7 @@ class Snapshot extends core_1.Model {
|
|
|
27
27
|
status: {
|
|
28
28
|
type: core_1.DataTypes.ENUM('success', 'failed', 'pending'),
|
|
29
29
|
allowNull: false,
|
|
30
|
+
index: true,
|
|
30
31
|
},
|
|
31
32
|
html: {
|
|
32
33
|
type: core_1.DataTypes.TEXT,
|
|
@@ -44,6 +45,12 @@ class Snapshot extends core_1.Model {
|
|
|
44
45
|
type: core_1.DataTypes.STRING,
|
|
45
46
|
allowNull: true,
|
|
46
47
|
},
|
|
48
|
+
replace: {
|
|
49
|
+
type: core_1.DataTypes.BOOLEAN,
|
|
50
|
+
allowNull: false,
|
|
51
|
+
defaultValue: false,
|
|
52
|
+
index: true,
|
|
53
|
+
},
|
|
47
54
|
meta: {
|
|
48
55
|
type: core_1.DataTypes.JSON,
|
|
49
56
|
allowNull: true,
|
package/lib/esm/crawler.d.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import { JobState } from './store
|
|
2
|
-
|
|
3
|
-
export declare function createCrawlQueue(): void;
|
|
1
|
+
import { JobState, SnapshotModel } from './store';
|
|
2
|
+
export declare function createCrawlQueue(queue: string): any;
|
|
4
3
|
export declare function getDataDir(): Promise<{
|
|
5
4
|
htmlDir: string;
|
|
6
5
|
screenshotDir: string;
|
|
7
6
|
}>;
|
|
8
|
-
export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
|
|
7
|
+
export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, waitTime, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
|
|
9
8
|
html: string | null;
|
|
10
9
|
screenshot: Uint8Array<ArrayBufferLike> | null;
|
|
11
10
|
meta: {
|
package/lib/esm/crawler.js
CHANGED
|
@@ -14,16 +14,16 @@ import fs from 'fs-extra';
|
|
|
14
14
|
import path from 'path';
|
|
15
15
|
import { config, logger } from './config';
|
|
16
16
|
import { initPage } from './puppeteer';
|
|
17
|
-
import { convertJobToSnapshot, formatSnapshot } from './services/snapshot';
|
|
18
|
-
import { Job } from './store
|
|
19
|
-
import {
|
|
20
|
-
import { findMaxScrollHeight, formatUrl, isAcceptCrawler, md5 } from './utils';
|
|
17
|
+
import { convertJobToSnapshot, deleteSnapshots, formatSnapshot } from './services/snapshot';
|
|
18
|
+
import { Job, Snapshot, sequelize } from './store';
|
|
19
|
+
import { findMaxScrollHeight, formatUrl, isAcceptCrawler, md5, sleep } from './utils';
|
|
21
20
|
const { BaseState } = require('@abtnode/models');
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
// eslint-disable-next-line import/no-mutable-exports
|
|
22
|
+
const crawlQueue = createCrawlQueue('urlCrawler');
|
|
23
|
+
export function createCrawlQueue(queue) {
|
|
24
24
|
const db = new BaseState(Job);
|
|
25
|
-
|
|
26
|
-
store: new SequelizeStore(db,
|
|
25
|
+
return createQueue({
|
|
26
|
+
store: new SequelizeStore(db, queue),
|
|
27
27
|
concurrency: config.concurrency,
|
|
28
28
|
onJob: (job) => __awaiter(this, void 0, void 0, function* () {
|
|
29
29
|
logger.info('Starting to execute crawl job', job);
|
|
@@ -64,22 +64,39 @@ export function createCrawlQueue() {
|
|
|
64
64
|
yield Snapshot.upsert(snapshot);
|
|
65
65
|
return snapshot;
|
|
66
66
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
67
|
+
const snapshot = yield sequelize.transaction((txn) => __awaiter(this, void 0, void 0, function* () {
|
|
68
|
+
// delete old snapshot
|
|
69
|
+
if (formattedJob.replace) {
|
|
70
|
+
try {
|
|
71
|
+
const deletedJobIds = yield deleteSnapshots({
|
|
72
|
+
url: formattedJob.url,
|
|
73
|
+
replace: true,
|
|
74
|
+
}, { txn });
|
|
75
|
+
if (deletedJobIds) {
|
|
76
|
+
logger.info('Deleted old snapshot', { deletedJobIds });
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
logger.error('Failed to delete old snapshot', { error, formattedJob });
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// save html and screenshot to data dir
|
|
84
|
+
const { screenshotPath, htmlPath } = yield saveSnapshotToLocal({
|
|
85
|
+
screenshot: result.screenshot,
|
|
86
|
+
html: result.html,
|
|
87
|
+
});
|
|
88
|
+
const snapshot = convertJobToSnapshot({
|
|
89
|
+
job: formattedJob,
|
|
90
|
+
snapshot: {
|
|
91
|
+
status: 'success',
|
|
92
|
+
screenshot: screenshotPath === null || screenshotPath === void 0 ? void 0 : screenshotPath.replace(config.dataDir, ''),
|
|
93
|
+
html: htmlPath === null || htmlPath === void 0 ? void 0 : htmlPath.replace(config.dataDir, ''),
|
|
94
|
+
meta: result.meta,
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
yield Snapshot.upsert(snapshot, { transaction: txn });
|
|
98
|
+
return snapshot;
|
|
99
|
+
}));
|
|
83
100
|
return snapshot;
|
|
84
101
|
}
|
|
85
102
|
catch (error) {
|
|
@@ -129,7 +146,7 @@ function saveSnapshotToLocal(_a) {
|
|
|
129
146
|
};
|
|
130
147
|
});
|
|
131
148
|
}
|
|
132
|
-
export const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, fullPage = false, headers, cookies, localStorage, }) {
|
|
149
|
+
export const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, waitTime = 0, fullPage = false, headers, cookies, localStorage, }) {
|
|
133
150
|
const page = yield initPage();
|
|
134
151
|
if (width && height) {
|
|
135
152
|
yield page.setViewport({ width, height, deviceScaleFactor: 2 });
|
|
@@ -166,9 +183,18 @@ export const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function*
|
|
|
166
183
|
}
|
|
167
184
|
// await for networkidle0
|
|
168
185
|
// https://pptr.dev/api/puppeteer.page.waitfornetworkidle
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
186
|
+
try {
|
|
187
|
+
yield Promise.all([
|
|
188
|
+
page.waitForNetworkIdle({
|
|
189
|
+
idleTime: 1.5 * 1000,
|
|
190
|
+
timeout,
|
|
191
|
+
}),
|
|
192
|
+
sleep(waitTime),
|
|
193
|
+
]);
|
|
194
|
+
}
|
|
195
|
+
catch (err) {
|
|
196
|
+
logger.warn(`Failed to wait for network idle in ${url}:`, err);
|
|
197
|
+
}
|
|
172
198
|
// get screenshot
|
|
173
199
|
if (includeScreenshot) {
|
|
174
200
|
// Try to find the tallest element and set the browser to the same height
|
|
@@ -212,7 +238,7 @@ export const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function*
|
|
|
212
238
|
// check if the page is an error page
|
|
213
239
|
const isErrorPage = ['<h2>Unexpected Application Error!</h2>', 'Current route occurred an error'].some((errorHtml) => data.html.includes(errorHtml));
|
|
214
240
|
if (isErrorPage) {
|
|
215
|
-
throw new Error(
|
|
241
|
+
throw new Error(`${url} is an error page`);
|
|
216
242
|
}
|
|
217
243
|
meta.title = data.title;
|
|
218
244
|
meta.description = data.description;
|
package/lib/esm/index.d.ts
CHANGED
|
@@ -3,4 +3,5 @@ export * from './crawler';
|
|
|
3
3
|
export * from './site';
|
|
4
4
|
export * from './services/snapshot';
|
|
5
5
|
export * as utils from './utils';
|
|
6
|
+
export { migrate } from './store/migrate';
|
|
6
7
|
export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
|
package/lib/esm/index.js
CHANGED
|
@@ -10,23 +10,20 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
/* eslint-disable @typescript-eslint/indent */
|
|
11
11
|
import merge from 'lodash/merge';
|
|
12
12
|
import { config, logger } from './config';
|
|
13
|
-
import { createCrawlQueue } from './crawler';
|
|
14
13
|
import { initCron } from './cron';
|
|
15
14
|
import { ensureBrowser } from './puppeteer';
|
|
16
|
-
import { initDatabase } from './store';
|
|
17
15
|
export * from './crawler';
|
|
18
16
|
export * from './site';
|
|
19
17
|
export * from './services/snapshot';
|
|
20
18
|
export * as utils from './utils';
|
|
19
|
+
export { migrate } from './store/migrate';
|
|
21
20
|
export function initCrawler(params) {
|
|
22
21
|
return __awaiter(this, void 0, void 0, function* () {
|
|
23
22
|
var _a;
|
|
24
23
|
merge(config, params);
|
|
25
24
|
logger.info('Init crawler', { params, config });
|
|
26
25
|
try {
|
|
27
|
-
yield initDatabase();
|
|
28
26
|
yield ensureBrowser();
|
|
29
|
-
yield createCrawlQueue();
|
|
30
27
|
if ((_a = config.siteCron) === null || _a === void 0 ? void 0 : _a.enabled) {
|
|
31
28
|
yield initCron();
|
|
32
29
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { SnapshotModel } from '../store
|
|
1
|
+
import { Transaction, WhereOptions } from '@sequelize/core';
|
|
2
|
+
import { JobState, SnapshotModel } from '../store';
|
|
3
3
|
export declare function convertJobToSnapshot({ job, snapshot }: {
|
|
4
4
|
job: JobState;
|
|
5
5
|
snapshot?: Partial<SnapshotModel>;
|
|
@@ -10,3 +10,6 @@ export declare function formatSnapshot(snapshot: SnapshotModel, columns?: Array<
|
|
|
10
10
|
*/
|
|
11
11
|
export declare function getSnapshot(jobId: string): Promise<SnapshotModel | null>;
|
|
12
12
|
export declare function getLatestSnapshot(url: string): Promise<SnapshotModel | null>;
|
|
13
|
+
export declare function deleteSnapshots(where: WhereOptions<SnapshotModel>, { txn }?: {
|
|
14
|
+
txn?: Transaction;
|
|
15
|
+
}): Promise<string[]>;
|
|
@@ -12,12 +12,11 @@ import pick from 'lodash/pick';
|
|
|
12
12
|
import fs from 'node:fs/promises';
|
|
13
13
|
import path from 'node:path';
|
|
14
14
|
import { joinURL } from 'ufo';
|
|
15
|
-
import { config } from '../config';
|
|
16
|
-
import { Job } from '../store
|
|
17
|
-
import { Snapshot } from '../store/snapshot';
|
|
15
|
+
import { config, logger } from '../config';
|
|
16
|
+
import { Job, Snapshot } from '../store';
|
|
18
17
|
import { formatUrl } from '../utils';
|
|
19
18
|
export function convertJobToSnapshot({ job, snapshot }) {
|
|
20
|
-
return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), options: {
|
|
19
|
+
return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), replace: job.replace, options: {
|
|
21
20
|
width: job.width,
|
|
22
21
|
height: job.height,
|
|
23
22
|
includeScreenshot: job.includeScreenshot,
|
|
@@ -76,7 +75,37 @@ export function getLatestSnapshot(url) {
|
|
|
76
75
|
url: formatUrl(url),
|
|
77
76
|
status: 'success',
|
|
78
77
|
},
|
|
78
|
+
order: [
|
|
79
|
+
['lastModified', 'DESC'],
|
|
80
|
+
['updatedAt', 'DESC'],
|
|
81
|
+
],
|
|
79
82
|
});
|
|
80
83
|
return snapshot ? formatSnapshot(snapshot) : null;
|
|
81
84
|
});
|
|
82
85
|
}
|
|
86
|
+
export function deleteSnapshots(where_1) {
|
|
87
|
+
return __awaiter(this, arguments, void 0, function* (where, { txn } = {}) {
|
|
88
|
+
const snapshots = yield Snapshot.findAll({
|
|
89
|
+
where,
|
|
90
|
+
order: [
|
|
91
|
+
['lastModified', 'DESC'],
|
|
92
|
+
['updatedAt', 'DESC'],
|
|
93
|
+
],
|
|
94
|
+
});
|
|
95
|
+
const jobIds = yield Promise.all(snapshots.map((snapshot) => __awaiter(this, void 0, void 0, function* () {
|
|
96
|
+
try {
|
|
97
|
+
yield Promise.all([
|
|
98
|
+
snapshot.html && fs.unlink(path.join(config.dataDir, snapshot.html)),
|
|
99
|
+
snapshot.screenshot && fs.unlink(path.join(config.dataDir, snapshot.screenshot)),
|
|
100
|
+
]);
|
|
101
|
+
yield snapshot.destroy({ transaction: txn });
|
|
102
|
+
return snapshot.jobId;
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
logger.error('Failed to delete snapshot', { error, snapshot });
|
|
106
|
+
throw error;
|
|
107
|
+
}
|
|
108
|
+
})));
|
|
109
|
+
return jobIds.filter(Boolean);
|
|
110
|
+
});
|
|
111
|
+
}
|
package/lib/esm/site.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import { Site } from './config';
|
|
2
|
-
export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(string | null)[]>;
|
|
2
|
+
export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(`${string}-${string}-${string}-${string}-${string}` | null)[]>;
|
package/lib/esm/site.js
CHANGED
|
@@ -8,12 +8,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
10
|
import uniq from 'lodash/uniq';
|
|
11
|
+
import { randomUUID } from 'node:crypto';
|
|
11
12
|
import pMap from 'p-map';
|
|
12
13
|
import { config, logger } from './config';
|
|
13
|
-
import {
|
|
14
|
-
import { Snapshot } from './store
|
|
14
|
+
import { createCrawlQueue } from './crawler';
|
|
15
|
+
import { Snapshot } from './store';
|
|
15
16
|
import { formatUrl, getSitemapList } from './utils';
|
|
16
17
|
const crawlBlockletRunningMap = new Map();
|
|
18
|
+
const crawlQueue = createCrawlQueue('cronJobs');
|
|
17
19
|
function parseSitemapUrl(sitemapItem) {
|
|
18
20
|
var _a;
|
|
19
21
|
const links = ((_a = sitemapItem.links) === null || _a === void 0 ? void 0 : _a.map((item) => item.url)) || [];
|
|
@@ -61,12 +63,16 @@ export const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ ur
|
|
|
61
63
|
url,
|
|
62
64
|
});
|
|
63
65
|
crawlCount++;
|
|
64
|
-
|
|
66
|
+
const jobId = randomUUID();
|
|
67
|
+
crawlQueue.push({
|
|
68
|
+
id: jobId,
|
|
65
69
|
url,
|
|
66
70
|
lastModified: sitemapItem.lastmod,
|
|
67
71
|
includeScreenshot: false,
|
|
68
72
|
includeHtml: true,
|
|
73
|
+
replace: true,
|
|
69
74
|
});
|
|
75
|
+
return jobId;
|
|
70
76
|
}), { concurrency: ((_b = config.siteCron) === null || _b === void 0 ? void 0 : _b.concurrency) || 30 });
|
|
71
77
|
logger.info('Enqueued jobs from sitemap finished', {
|
|
72
78
|
url,
|
package/lib/esm/store/index.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
import { Sequelize } from '@sequelize/core';
|
|
2
2
|
import { SqliteDialect } from '@sequelize/sqlite3';
|
|
3
|
-
|
|
3
|
+
declare const sequelize: Sequelize<SqliteDialect>;
|
|
4
|
+
export { sequelize };
|
|
5
|
+
export * from './job';
|
|
6
|
+
export * from './snapshot';
|
package/lib/esm/store/index.js
CHANGED
|
@@ -1,51 +1,29 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
1
|
import { Sequelize } from '@sequelize/core';
|
|
11
2
|
import { SqliteDialect } from '@sequelize/sqlite3';
|
|
12
3
|
import path from 'path';
|
|
13
4
|
import { config, logger } from '../config';
|
|
14
5
|
import { Job } from './job';
|
|
15
6
|
import { Snapshot } from './snapshot';
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
sequelize.query('pragma journal_size_limit = 67108864;'),
|
|
40
|
-
]);
|
|
41
|
-
yield sequelize.authenticate();
|
|
42
|
-
yield sequelize.sync({ alter: process.env.ALTER_SQLITE === 'true' });
|
|
43
|
-
logger.info('Successfully connected to database');
|
|
44
|
-
}
|
|
45
|
-
catch (error) {
|
|
46
|
-
logger.error('Failed to connect to database:', error);
|
|
47
|
-
throw error;
|
|
48
|
-
}
|
|
49
|
-
return sequelize;
|
|
50
|
-
});
|
|
51
|
-
}
|
|
7
|
+
const sequelize = new Sequelize({
|
|
8
|
+
dialect: SqliteDialect,
|
|
9
|
+
storage: path.join(config.dataDir, 'snap-kit.db'),
|
|
10
|
+
logging: (msg) => process.env.SQLITE_LOG && logger.debug(msg),
|
|
11
|
+
pool: {
|
|
12
|
+
min: 0,
|
|
13
|
+
max: 10,
|
|
14
|
+
idle: 10000,
|
|
15
|
+
},
|
|
16
|
+
retry: {
|
|
17
|
+
match: [/SQLITE_BUSY/],
|
|
18
|
+
name: 'query',
|
|
19
|
+
max: 10,
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
sequelize.query('pragma journal_mode = WAL;');
|
|
23
|
+
sequelize.query('pragma synchronous = normal;');
|
|
24
|
+
sequelize.query('pragma journal_size_limit = 67108864;');
|
|
25
|
+
Job.initModel(sequelize);
|
|
26
|
+
Snapshot.initModel(sequelize);
|
|
27
|
+
export { sequelize };
|
|
28
|
+
export * from './job';
|
|
29
|
+
export * from './snapshot';
|
package/lib/esm/store/job.d.ts
CHANGED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/* eslint-disable global-require */
|
|
2
|
+
import { SequelizeStorage, Umzug } from 'umzug';
|
|
3
|
+
import { sequelize } from './index';
|
|
4
|
+
import * as migration20250615 from './migrations/20250615-genesis';
|
|
5
|
+
import * as migration20250616Replace from './migrations/20250616-replace';
|
|
6
|
+
const umzug = new Umzug({
|
|
7
|
+
migrations: [
|
|
8
|
+
{
|
|
9
|
+
name: '20250615-genesis',
|
|
10
|
+
up: ({ context }) => migration20250615.up({ context }),
|
|
11
|
+
down: ({ context }) => migration20250615.down({ context }),
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
name: '20250616-replace',
|
|
15
|
+
up: ({ context }) => migration20250616Replace.up({ context }),
|
|
16
|
+
down: ({ context }) => migration20250616Replace.down({ context }),
|
|
17
|
+
},
|
|
18
|
+
],
|
|
19
|
+
context: sequelize.getQueryInterface(),
|
|
20
|
+
storage: new SequelizeStorage({ sequelize }),
|
|
21
|
+
logger: console,
|
|
22
|
+
});
|
|
23
|
+
export function migrate() {
|
|
24
|
+
return umzug.up();
|
|
25
|
+
}
|
|
26
|
+
export { umzug };
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
/* eslint-disable no-console */
|
|
11
|
+
import { DataTypes } from '@sequelize/core';
|
|
12
|
+
export function up(_a) {
|
|
13
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
14
|
+
console.log('[20250615-genesis:up] Migrating...');
|
|
15
|
+
yield context.createTable('snap', {
|
|
16
|
+
jobId: {
|
|
17
|
+
type: DataTypes.STRING,
|
|
18
|
+
primaryKey: true,
|
|
19
|
+
allowNull: false,
|
|
20
|
+
},
|
|
21
|
+
url: {
|
|
22
|
+
type: DataTypes.STRING,
|
|
23
|
+
allowNull: false,
|
|
24
|
+
index: true,
|
|
25
|
+
},
|
|
26
|
+
status: {
|
|
27
|
+
type: DataTypes.ENUM('success', 'failed', 'pending'),
|
|
28
|
+
allowNull: false,
|
|
29
|
+
},
|
|
30
|
+
html: {
|
|
31
|
+
type: DataTypes.TEXT,
|
|
32
|
+
allowNull: true,
|
|
33
|
+
},
|
|
34
|
+
screenshot: {
|
|
35
|
+
type: DataTypes.STRING,
|
|
36
|
+
allowNull: true,
|
|
37
|
+
},
|
|
38
|
+
error: {
|
|
39
|
+
type: DataTypes.STRING,
|
|
40
|
+
allowNull: true,
|
|
41
|
+
},
|
|
42
|
+
lastModified: {
|
|
43
|
+
type: DataTypes.STRING,
|
|
44
|
+
allowNull: true,
|
|
45
|
+
},
|
|
46
|
+
meta: {
|
|
47
|
+
type: DataTypes.JSON,
|
|
48
|
+
allowNull: true,
|
|
49
|
+
},
|
|
50
|
+
options: {
|
|
51
|
+
type: DataTypes.JSON,
|
|
52
|
+
allowNull: true,
|
|
53
|
+
},
|
|
54
|
+
createdAt: {
|
|
55
|
+
type: DataTypes.DATE,
|
|
56
|
+
defaultValue: DataTypes.NOW,
|
|
57
|
+
},
|
|
58
|
+
updatedAt: {
|
|
59
|
+
type: DataTypes.DATE,
|
|
60
|
+
defaultValue: DataTypes.NOW,
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
yield context.createTable('jobs', {
|
|
64
|
+
id: {
|
|
65
|
+
type: DataTypes.STRING(40),
|
|
66
|
+
primaryKey: true,
|
|
67
|
+
},
|
|
68
|
+
queue: {
|
|
69
|
+
type: DataTypes.STRING(32),
|
|
70
|
+
allowNull: false,
|
|
71
|
+
},
|
|
72
|
+
job: {
|
|
73
|
+
type: DataTypes.JSON,
|
|
74
|
+
allowNull: false,
|
|
75
|
+
},
|
|
76
|
+
retryCount: {
|
|
77
|
+
type: DataTypes.INTEGER,
|
|
78
|
+
},
|
|
79
|
+
delay: {
|
|
80
|
+
type: DataTypes.INTEGER,
|
|
81
|
+
},
|
|
82
|
+
willRunAt: {
|
|
83
|
+
type: DataTypes.INTEGER,
|
|
84
|
+
},
|
|
85
|
+
cancelled: {
|
|
86
|
+
type: DataTypes.BOOLEAN,
|
|
87
|
+
defaultValue: false,
|
|
88
|
+
},
|
|
89
|
+
createdAt: {
|
|
90
|
+
type: DataTypes.DATE,
|
|
91
|
+
defaultValue: DataTypes.NOW,
|
|
92
|
+
index: true,
|
|
93
|
+
},
|
|
94
|
+
updatedAt: {
|
|
95
|
+
type: DataTypes.DATE,
|
|
96
|
+
defaultValue: DataTypes.NOW,
|
|
97
|
+
index: true,
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
console.log('[20250615-genesis:up] Migrated successfully!');
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
export function down(_a) {
|
|
104
|
+
return __awaiter(this, arguments, void 0, function* ({ context }) {
|
|
105
|
+
console.log('[20250615-genesis:down] Migrating...');
|
|
106
|
+
yield context.dropTable('snap');
|
|
107
|
+
yield context.dropTable('jobs');
|
|
108
|
+
console.log('[20250615-genesis:down] Migrated successfully!');
|
|
109
|
+
});
|
|
110
|
+
}
|