@arcblock/crawler 1.1.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/lib/cjs/crawler.d.ts +3 -4
  2. package/lib/cjs/crawler.js +58 -32
  3. package/lib/cjs/index.d.ts +1 -0
  4. package/lib/cjs/index.js +3 -5
  5. package/lib/cjs/services/snapshot.d.ts +5 -2
  6. package/lib/cjs/services/snapshot.js +36 -6
  7. package/lib/cjs/site.d.ts +1 -1
  8. package/lib/cjs/site.js +9 -3
  9. package/lib/cjs/store/index.d.ts +4 -1
  10. package/lib/cjs/store/index.js +37 -45
  11. package/lib/cjs/store/job.d.ts +2 -0
  12. package/lib/cjs/store/migrate.d.ts +4 -0
  13. package/lib/cjs/store/migrate.js +63 -0
  14. package/lib/cjs/store/migrations/20250615-genesis.d.ts +6 -0
  15. package/lib/cjs/store/migrations/20250615-genesis.js +114 -0
  16. package/lib/cjs/store/migrations/20250616-replace.d.ts +6 -0
  17. package/lib/cjs/store/migrations/20250616-replace.js +40 -0
  18. package/lib/cjs/store/snapshot.d.ts +2 -0
  19. package/lib/cjs/store/snapshot.js +7 -0
  20. package/lib/esm/crawler.d.ts +3 -4
  21. package/lib/esm/crawler.js +55 -29
  22. package/lib/esm/index.d.ts +1 -0
  23. package/lib/esm/index.js +1 -4
  24. package/lib/esm/services/snapshot.d.ts +5 -2
  25. package/lib/esm/services/snapshot.js +33 -4
  26. package/lib/esm/site.d.ts +1 -1
  27. package/lib/esm/site.js +9 -3
  28. package/lib/esm/store/index.d.ts +4 -1
  29. package/lib/esm/store/index.js +23 -45
  30. package/lib/esm/store/job.d.ts +2 -0
  31. package/lib/esm/store/migrate.d.ts +4 -0
  32. package/lib/esm/store/migrate.js +26 -0
  33. package/lib/esm/store/migrations/20250615-genesis.d.ts +6 -0
  34. package/lib/esm/store/migrations/20250615-genesis.js +110 -0
  35. package/lib/esm/store/migrations/20250616-replace.d.ts +6 -0
  36. package/lib/esm/store/migrations/20250616-replace.js +36 -0
  37. package/lib/esm/store/snapshot.d.ts +2 -0
  38. package/lib/esm/store/snapshot.js +7 -0
  39. package/package.json +3 -2
@@ -1,11 +1,10 @@
1
- import { JobState } from './store/job';
2
- import { SnapshotModel } from './store/snapshot';
3
- export declare function createCrawlQueue(): void;
1
+ import { JobState, SnapshotModel } from './store';
2
+ export declare function createCrawlQueue(queue: string): any;
4
3
  export declare function getDataDir(): Promise<{
5
4
  htmlDir: string;
6
5
  screenshotDir: string;
7
6
  }>;
8
- export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
7
+ export declare const getPageContent: ({ url, includeScreenshot, includeHtml, width, height, quality, timeout, waitTime, fullPage, headers, cookies, localStorage, }: JobState) => Promise<{
9
8
  html: string | null;
10
9
  screenshot: Uint8Array<ArrayBufferLike> | null;
11
10
  meta: {
@@ -24,15 +24,15 @@ const path_1 = __importDefault(require("path"));
24
24
  const config_1 = require("./config");
25
25
  const puppeteer_1 = require("./puppeteer");
26
26
  const snapshot_1 = require("./services/snapshot");
27
- const job_1 = require("./store/job");
28
- const snapshot_2 = require("./store/snapshot");
27
+ const store_1 = require("./store");
29
28
  const utils_1 = require("./utils");
30
29
  const { BaseState } = require('@abtnode/models');
31
- let crawlQueue;
32
- function createCrawlQueue() {
33
- const db = new BaseState(job_1.Job);
34
- crawlQueue = (0, queue_1.default)({
35
- store: new sequelize_1.default(db, 'crawler'),
30
+ // eslint-disable-next-line import/no-mutable-exports
31
+ const crawlQueue = createCrawlQueue('urlCrawler');
32
+ function createCrawlQueue(queue) {
33
+ const db = new BaseState(store_1.Job);
34
+ return (0, queue_1.default)({
35
+ store: new sequelize_1.default(db, queue),
36
36
  concurrency: config_1.config.concurrency,
37
37
  onJob: (job) => __awaiter(this, void 0, void 0, function* () {
38
38
  config_1.logger.info('Starting to execute crawl job', job);
@@ -46,7 +46,7 @@ function createCrawlQueue() {
46
46
  error: 'Denied by robots.txt',
47
47
  },
48
48
  });
49
- yield snapshot_2.Snapshot.upsert(snapshot);
49
+ yield store_1.Snapshot.upsert(snapshot);
50
50
  return snapshot;
51
51
  }
52
52
  // if index reach autoCloseBrowserCount, close browser
@@ -70,25 +70,42 @@ function createCrawlQueue() {
70
70
  error: 'Failed to crawl content',
71
71
  },
72
72
  });
73
- yield snapshot_2.Snapshot.upsert(snapshot);
73
+ yield store_1.Snapshot.upsert(snapshot);
74
74
  return snapshot;
75
75
  }
76
- // save html and screenshot to data dir
77
- const { screenshotPath, htmlPath } = yield saveSnapshotToLocal({
78
- screenshot: result.screenshot,
79
- html: result.html,
80
- });
81
- // const lastModified = job.lastmodMap?.get(url) || new Date().toISOString();
82
- const snapshot = (0, snapshot_1.convertJobToSnapshot)({
83
- job: formattedJob,
84
- snapshot: {
85
- status: 'success',
86
- screenshot: screenshotPath === null || screenshotPath === void 0 ? void 0 : screenshotPath.replace(config_1.config.dataDir, ''),
87
- html: htmlPath === null || htmlPath === void 0 ? void 0 : htmlPath.replace(config_1.config.dataDir, ''),
88
- meta: result.meta,
89
- },
90
- });
91
- yield snapshot_2.Snapshot.upsert(snapshot);
76
+ const snapshot = yield store_1.sequelize.transaction((txn) => __awaiter(this, void 0, void 0, function* () {
77
+ // delete old snapshot
78
+ if (formattedJob.replace) {
79
+ try {
80
+ const deletedJobIds = yield (0, snapshot_1.deleteSnapshots)({
81
+ url: formattedJob.url,
82
+ replace: true,
83
+ }, { txn });
84
+ if (deletedJobIds) {
85
+ config_1.logger.info('Deleted old snapshot', { deletedJobIds });
86
+ }
87
+ }
88
+ catch (error) {
89
+ config_1.logger.error('Failed to delete old snapshot', { error, formattedJob });
90
+ }
91
+ }
92
+ // save html and screenshot to data dir
93
+ const { screenshotPath, htmlPath } = yield saveSnapshotToLocal({
94
+ screenshot: result.screenshot,
95
+ html: result.html,
96
+ });
97
+ const snapshot = (0, snapshot_1.convertJobToSnapshot)({
98
+ job: formattedJob,
99
+ snapshot: {
100
+ status: 'success',
101
+ screenshot: screenshotPath === null || screenshotPath === void 0 ? void 0 : screenshotPath.replace(config_1.config.dataDir, ''),
102
+ html: htmlPath === null || htmlPath === void 0 ? void 0 : htmlPath.replace(config_1.config.dataDir, ''),
103
+ meta: result.meta,
104
+ },
105
+ });
106
+ yield store_1.Snapshot.upsert(snapshot, { transaction: txn });
107
+ return snapshot;
108
+ }));
92
109
  return snapshot;
93
110
  }
94
111
  catch (error) {
@@ -100,7 +117,7 @@ function createCrawlQueue() {
100
117
  error: 'Internal error',
101
118
  },
102
119
  });
103
- yield snapshot_2.Snapshot.upsert(snapshot);
120
+ yield store_1.Snapshot.upsert(snapshot);
104
121
  return snapshot;
105
122
  }
106
123
  }),
@@ -138,7 +155,7 @@ function saveSnapshotToLocal(_a) {
138
155
  };
139
156
  });
140
157
  }
141
- const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, fullPage = false, headers, cookies, localStorage, }) {
158
+ const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, timeout = 90 * 1000, waitTime = 0, fullPage = false, headers, cookies, localStorage, }) {
142
159
  const page = yield (0, puppeteer_1.initPage)();
143
160
  if (width && height) {
144
161
  yield page.setViewport({ width, height, deviceScaleFactor: 2 });
@@ -175,9 +192,18 @@ const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url,
175
192
  }
176
193
  // await for networkidle0
177
194
  // https://pptr.dev/api/puppeteer.page.waitfornetworkidle
178
- yield page.waitForNetworkIdle({
179
- idleTime: 1.5 * 1000,
180
- });
195
+ try {
196
+ yield Promise.all([
197
+ page.waitForNetworkIdle({
198
+ idleTime: 1.5 * 1000,
199
+ timeout,
200
+ }),
201
+ (0, utils_1.sleep)(waitTime),
202
+ ]);
203
+ }
204
+ catch (err) {
205
+ config_1.logger.warn(`Failed to wait for network idle in ${url}:`, err);
206
+ }
181
207
  // get screenshot
182
208
  if (includeScreenshot) {
183
209
  // Try to find the tallest element and set the browser to the same height
@@ -221,7 +247,7 @@ const getPageContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url,
221
247
  // check if the page is an error page
222
248
  const isErrorPage = ['<h2>Unexpected Application Error!</h2>', 'Current route occurred an error'].some((errorHtml) => data.html.includes(errorHtml));
223
249
  if (isErrorPage) {
224
- throw new Error('Page is an error page');
250
+ throw new Error(`${url} is an error page`);
225
251
  }
226
252
  meta.title = data.title;
227
253
  meta.description = data.description;
@@ -257,7 +283,7 @@ exports.getPageContent = getPageContent;
257
283
  function crawlUrl(params, callback) {
258
284
  return __awaiter(this, void 0, void 0, function* () {
259
285
  // skip duplicate job
260
- const existsJob = yield job_1.Job.isExists(params);
286
+ const existsJob = yield store_1.Job.isExists(params);
261
287
  if (existsJob) {
262
288
  config_1.logger.info(`Crawl job already exists for ${params.url}, skip`);
263
289
  return existsJob.id;
@@ -3,4 +3,5 @@ export * from './crawler';
3
3
  export * from './site';
4
4
  export * from './services/snapshot';
5
5
  export * as utils from './utils';
6
+ export { migrate } from './store/migrate';
6
7
  export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
package/lib/cjs/index.js CHANGED
@@ -48,28 +48,26 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
48
48
  return (mod && mod.__esModule) ? mod : { "default": mod };
49
49
  };
50
50
  Object.defineProperty(exports, "__esModule", { value: true });
51
- exports.utils = void 0;
51
+ exports.migrate = exports.utils = void 0;
52
52
  exports.initCrawler = initCrawler;
53
53
  /* eslint-disable @typescript-eslint/indent */
54
54
  const merge_1 = __importDefault(require("lodash/merge"));
55
55
  const config_1 = require("./config");
56
- const crawler_1 = require("./crawler");
57
56
  const cron_1 = require("./cron");
58
57
  const puppeteer_1 = require("./puppeteer");
59
- const store_1 = require("./store");
60
58
  __exportStar(require("./crawler"), exports);
61
59
  __exportStar(require("./site"), exports);
62
60
  __exportStar(require("./services/snapshot"), exports);
63
61
  exports.utils = __importStar(require("./utils"));
62
+ var migrate_1 = require("./store/migrate");
63
+ Object.defineProperty(exports, "migrate", { enumerable: true, get: function () { return migrate_1.migrate; } });
64
64
  function initCrawler(params) {
65
65
  return __awaiter(this, void 0, void 0, function* () {
66
66
  var _a;
67
67
  (0, merge_1.default)(config_1.config, params);
68
68
  config_1.logger.info('Init crawler', { params, config: config_1.config });
69
69
  try {
70
- yield (0, store_1.initDatabase)();
71
70
  yield (0, puppeteer_1.ensureBrowser)();
72
- yield (0, crawler_1.createCrawlQueue)();
73
71
  if ((_a = config_1.config.siteCron) === null || _a === void 0 ? void 0 : _a.enabled) {
74
72
  yield (0, cron_1.initCron)();
75
73
  }
@@ -1,5 +1,5 @@
1
- import { JobState } from '../store/job';
2
- import { SnapshotModel } from '../store/snapshot';
1
+ import { Transaction, WhereOptions } from '@sequelize/core';
2
+ import { JobState, SnapshotModel } from '../store';
3
3
  export declare function convertJobToSnapshot({ job, snapshot }: {
4
4
  job: JobState;
5
5
  snapshot?: Partial<SnapshotModel>;
@@ -10,3 +10,6 @@ export declare function formatSnapshot(snapshot: SnapshotModel, columns?: Array<
10
10
  */
11
11
  export declare function getSnapshot(jobId: string): Promise<SnapshotModel | null>;
12
12
  export declare function getLatestSnapshot(url: string): Promise<SnapshotModel | null>;
13
+ export declare function deleteSnapshots(where: WhereOptions<SnapshotModel>, { txn }?: {
14
+ txn?: Transaction;
15
+ }): Promise<string[]>;
@@ -16,17 +16,17 @@ exports.convertJobToSnapshot = convertJobToSnapshot;
16
16
  exports.formatSnapshot = formatSnapshot;
17
17
  exports.getSnapshot = getSnapshot;
18
18
  exports.getLatestSnapshot = getLatestSnapshot;
19
+ exports.deleteSnapshots = deleteSnapshots;
19
20
  const cloneDeep_1 = __importDefault(require("lodash/cloneDeep"));
20
21
  const pick_1 = __importDefault(require("lodash/pick"));
21
22
  const promises_1 = __importDefault(require("node:fs/promises"));
22
23
  const node_path_1 = __importDefault(require("node:path"));
23
24
  const ufo_1 = require("ufo");
24
25
  const config_1 = require("../config");
25
- const job_1 = require("../store/job");
26
- const snapshot_1 = require("../store/snapshot");
26
+ const store_1 = require("../store");
27
27
  const utils_1 = require("../utils");
28
28
  function convertJobToSnapshot({ job, snapshot }) {
29
- return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), options: {
29
+ return Object.assign({ jobId: job.jobId || job.id, url: job.url, lastModified: job.lastModified || new Date().toISOString(), replace: job.replace, options: {
30
30
  width: job.width,
31
31
  height: job.height,
32
32
  includeScreenshot: job.includeScreenshot,
@@ -64,11 +64,11 @@ function formatSnapshot(snapshot, columns) {
64
64
  */
65
65
  function getSnapshot(jobId) {
66
66
  return __awaiter(this, void 0, void 0, function* () {
67
- const snapshot = yield snapshot_1.Snapshot.findSnapshot({ where: { jobId } });
67
+ const snapshot = yield store_1.Snapshot.findSnapshot({ where: { jobId } });
68
68
  if (snapshot) {
69
69
  return formatSnapshot(snapshot);
70
70
  }
71
- const job = yield job_1.Job.findJob({ id: jobId });
71
+ const job = yield store_1.Job.findJob({ id: jobId });
72
72
  if (job) {
73
73
  return {
74
74
  jobId,
@@ -80,12 +80,42 @@ function getSnapshot(jobId) {
80
80
  }
81
81
  function getLatestSnapshot(url) {
82
82
  return __awaiter(this, void 0, void 0, function* () {
83
- const snapshot = yield snapshot_1.Snapshot.findSnapshot({
83
+ const snapshot = yield store_1.Snapshot.findSnapshot({
84
84
  where: {
85
85
  url: (0, utils_1.formatUrl)(url),
86
86
  status: 'success',
87
87
  },
88
+ order: [
89
+ ['lastModified', 'DESC'],
90
+ ['updatedAt', 'DESC'],
91
+ ],
88
92
  });
89
93
  return snapshot ? formatSnapshot(snapshot) : null;
90
94
  });
91
95
  }
96
+ function deleteSnapshots(where_1) {
97
+ return __awaiter(this, arguments, void 0, function* (where, { txn } = {}) {
98
+ const snapshots = yield store_1.Snapshot.findAll({
99
+ where,
100
+ order: [
101
+ ['lastModified', 'DESC'],
102
+ ['updatedAt', 'DESC'],
103
+ ],
104
+ });
105
+ const jobIds = yield Promise.all(snapshots.map((snapshot) => __awaiter(this, void 0, void 0, function* () {
106
+ try {
107
+ yield Promise.all([
108
+ snapshot.html && promises_1.default.unlink(node_path_1.default.join(config_1.config.dataDir, snapshot.html)),
109
+ snapshot.screenshot && promises_1.default.unlink(node_path_1.default.join(config_1.config.dataDir, snapshot.screenshot)),
110
+ ]);
111
+ yield snapshot.destroy({ transaction: txn });
112
+ return snapshot.jobId;
113
+ }
114
+ catch (error) {
115
+ config_1.logger.error('Failed to delete snapshot', { error, snapshot });
116
+ throw error;
117
+ }
118
+ })));
119
+ return jobIds.filter(Boolean);
120
+ });
121
+ }
package/lib/cjs/site.d.ts CHANGED
@@ -1,2 +1,2 @@
1
1
  import { Site } from './config';
2
- export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(string | null)[]>;
2
+ export declare const crawlSite: ({ url, pathname, interval }: Site) => Promise<(`${string}-${string}-${string}-${string}-${string}` | null)[]>;
package/lib/cjs/site.js CHANGED
@@ -14,12 +14,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  exports.crawlSite = void 0;
16
16
  const uniq_1 = __importDefault(require("lodash/uniq"));
17
+ const node_crypto_1 = require("node:crypto");
17
18
  const p_map_1 = __importDefault(require("p-map"));
18
19
  const config_1 = require("./config");
19
20
  const crawler_1 = require("./crawler");
20
- const snapshot_1 = require("./store/snapshot");
21
+ const store_1 = require("./store");
21
22
  const utils_1 = require("./utils");
22
23
  const crawlBlockletRunningMap = new Map();
24
+ const crawlQueue = (0, crawler_1.createCrawlQueue)('cronJobs');
23
25
  function parseSitemapUrl(sitemapItem) {
24
26
  var _a;
25
27
  const links = ((_a = sitemapItem.links) === null || _a === void 0 ? void 0 : _a.map((item) => item.url)) || [];
@@ -48,7 +50,7 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
48
50
  try {
49
51
  const jobIds = yield (0, p_map_1.default)(sitemapItems, (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, sitemapItem }) {
50
52
  processCount++;
51
- const snapshot = yield snapshot_1.Snapshot.findOne({ where: { url: (0, utils_1.formatUrl)(url) } });
53
+ const snapshot = yield store_1.Snapshot.findOne({ where: { url: (0, utils_1.formatUrl)(url) } });
52
54
  if (snapshot === null || snapshot === void 0 ? void 0 : snapshot.lastModified) {
53
55
  const lastModified = new Date(snapshot.lastModified);
54
56
  // skip if snapshot lastModified is greater than sitemap lastmod
@@ -67,12 +69,16 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
67
69
  url,
68
70
  });
69
71
  crawlCount++;
70
- return (0, crawler_1.crawlUrl)({
72
+ const jobId = (0, node_crypto_1.randomUUID)();
73
+ crawlQueue.push({
74
+ id: jobId,
71
75
  url,
72
76
  lastModified: sitemapItem.lastmod,
73
77
  includeScreenshot: false,
74
78
  includeHtml: true,
79
+ replace: true,
75
80
  });
81
+ return jobId;
76
82
  }), { concurrency: ((_b = config_1.config.siteCron) === null || _b === void 0 ? void 0 : _b.concurrency) || 30 });
77
83
  config_1.logger.info('Enqueued jobs from sitemap finished', {
78
84
  url,
@@ -1,3 +1,6 @@
1
1
  import { Sequelize } from '@sequelize/core';
2
2
  import { SqliteDialect } from '@sequelize/sqlite3';
3
- export declare function initDatabase(): Promise<Sequelize<SqliteDialect>>;
3
+ declare const sequelize: Sequelize<SqliteDialect>;
4
+ export { sequelize };
5
+ export * from './job';
6
+ export * from './snapshot';
@@ -1,57 +1,49 @@
1
1
  "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
10
15
  };
11
16
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
17
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
18
  };
14
19
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.initDatabase = initDatabase;
20
+ exports.sequelize = void 0;
16
21
  const core_1 = require("@sequelize/core");
17
22
  const sqlite3_1 = require("@sequelize/sqlite3");
18
23
  const path_1 = __importDefault(require("path"));
19
24
  const config_1 = require("../config");
20
25
  const job_1 = require("./job");
21
26
  const snapshot_1 = require("./snapshot");
22
- function initDatabase() {
23
- return __awaiter(this, void 0, void 0, function* () {
24
- const sequelize = new core_1.Sequelize({
25
- dialect: sqlite3_1.SqliteDialect,
26
- storage: path_1.default.join(config_1.config.dataDir, 'snap-kit.db'),
27
- logging: (msg) => process.env.SQLITE_LOG && config_1.logger.debug(msg),
28
- pool: {
29
- min: 0,
30
- max: 10,
31
- idle: 10000,
32
- },
33
- retry: {
34
- match: [/SQLITE_BUSY/],
35
- name: 'query',
36
- max: 10,
37
- },
38
- });
39
- job_1.Job.initModel(sequelize);
40
- snapshot_1.Snapshot.initModel(sequelize);
41
- try {
42
- yield Promise.all([
43
- sequelize.query('pragma journal_mode = WAL;'),
44
- sequelize.query('pragma synchronous = normal;'),
45
- sequelize.query('pragma journal_size_limit = 67108864;'),
46
- ]);
47
- yield sequelize.authenticate();
48
- yield sequelize.sync({ alter: process.env.ALTER_SQLITE === 'true' });
49
- config_1.logger.info('Successfully connected to database');
50
- }
51
- catch (error) {
52
- config_1.logger.error('Failed to connect to database:', error);
53
- throw error;
54
- }
55
- return sequelize;
56
- });
57
- }
27
+ const sequelize = new core_1.Sequelize({
28
+ dialect: sqlite3_1.SqliteDialect,
29
+ storage: path_1.default.join(config_1.config.dataDir, 'snap-kit.db'),
30
+ logging: (msg) => process.env.SQLITE_LOG && config_1.logger.debug(msg),
31
+ pool: {
32
+ min: 0,
33
+ max: 10,
34
+ idle: 10000,
35
+ },
36
+ retry: {
37
+ match: [/SQLITE_BUSY/],
38
+ name: 'query',
39
+ max: 10,
40
+ },
41
+ });
42
+ exports.sequelize = sequelize;
43
+ sequelize.query('pragma journal_mode = WAL;');
44
+ sequelize.query('pragma synchronous = normal;');
45
+ sequelize.query('pragma journal_size_limit = 67108864;');
46
+ job_1.Job.initModel(sequelize);
47
+ snapshot_1.Snapshot.initModel(sequelize);
48
+ __exportStar(require("./job"), exports);
49
+ __exportStar(require("./snapshot"), exports);
@@ -12,6 +12,8 @@ export interface JobState {
12
12
  timeout?: number;
13
13
  fullPage?: boolean;
14
14
  lastModified?: string;
15
+ waitTime?: number;
16
+ replace?: boolean;
15
17
  headers?: Record<string, string>;
16
18
  cookies?: CookieParam[];
17
19
  localStorage?: {
@@ -0,0 +1,4 @@
1
+ import { Umzug } from 'umzug';
2
+ declare const umzug: Umzug<import("@sequelize/sqlite3").SqliteQueryInterface<import("@sequelize/sqlite3").SqliteDialect>>;
3
+ export declare function migrate(): Promise<import("umzug").MigrationMeta[]>;
4
+ export { umzug };
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.umzug = void 0;
37
+ exports.migrate = migrate;
38
+ /* eslint-disable global-require */
39
+ const umzug_1 = require("umzug");
40
+ const index_1 = require("./index");
41
+ const migration20250615 = __importStar(require("./migrations/20250615-genesis"));
42
+ const migration20250616Replace = __importStar(require("./migrations/20250616-replace"));
43
+ const umzug = new umzug_1.Umzug({
44
+ migrations: [
45
+ {
46
+ name: '20250615-genesis',
47
+ up: ({ context }) => migration20250615.up({ context }),
48
+ down: ({ context }) => migration20250615.down({ context }),
49
+ },
50
+ {
51
+ name: '20250616-replace',
52
+ up: ({ context }) => migration20250616Replace.up({ context }),
53
+ down: ({ context }) => migration20250616Replace.down({ context }),
54
+ },
55
+ ],
56
+ context: index_1.sequelize.getQueryInterface(),
57
+ storage: new umzug_1.SequelizeStorage({ sequelize: index_1.sequelize }),
58
+ logger: console,
59
+ });
60
+ exports.umzug = umzug;
61
+ function migrate() {
62
+ return umzug.up();
63
+ }
@@ -0,0 +1,6 @@
1
+ export declare function up({ context }: {
2
+ context: any;
3
+ }): Promise<void>;
4
+ export declare function down({ context }: {
5
+ context: any;
6
+ }): Promise<void>;
@@ -0,0 +1,114 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.up = up;
13
+ exports.down = down;
14
+ /* eslint-disable no-console */
15
+ const core_1 = require("@sequelize/core");
16
+ function up(_a) {
17
+ return __awaiter(this, arguments, void 0, function* ({ context }) {
18
+ console.log('[20250615-genesis:up] Migrating...');
19
+ yield context.createTable('snap', {
20
+ jobId: {
21
+ type: core_1.DataTypes.STRING,
22
+ primaryKey: true,
23
+ allowNull: false,
24
+ },
25
+ url: {
26
+ type: core_1.DataTypes.STRING,
27
+ allowNull: false,
28
+ index: true,
29
+ },
30
+ status: {
31
+ type: core_1.DataTypes.ENUM('success', 'failed', 'pending'),
32
+ allowNull: false,
33
+ },
34
+ html: {
35
+ type: core_1.DataTypes.TEXT,
36
+ allowNull: true,
37
+ },
38
+ screenshot: {
39
+ type: core_1.DataTypes.STRING,
40
+ allowNull: true,
41
+ },
42
+ error: {
43
+ type: core_1.DataTypes.STRING,
44
+ allowNull: true,
45
+ },
46
+ lastModified: {
47
+ type: core_1.DataTypes.STRING,
48
+ allowNull: true,
49
+ },
50
+ meta: {
51
+ type: core_1.DataTypes.JSON,
52
+ allowNull: true,
53
+ },
54
+ options: {
55
+ type: core_1.DataTypes.JSON,
56
+ allowNull: true,
57
+ },
58
+ createdAt: {
59
+ type: core_1.DataTypes.DATE,
60
+ defaultValue: core_1.DataTypes.NOW,
61
+ },
62
+ updatedAt: {
63
+ type: core_1.DataTypes.DATE,
64
+ defaultValue: core_1.DataTypes.NOW,
65
+ },
66
+ });
67
+ yield context.createTable('jobs', {
68
+ id: {
69
+ type: core_1.DataTypes.STRING(40),
70
+ primaryKey: true,
71
+ },
72
+ queue: {
73
+ type: core_1.DataTypes.STRING(32),
74
+ allowNull: false,
75
+ },
76
+ job: {
77
+ type: core_1.DataTypes.JSON,
78
+ allowNull: false,
79
+ },
80
+ retryCount: {
81
+ type: core_1.DataTypes.INTEGER,
82
+ },
83
+ delay: {
84
+ type: core_1.DataTypes.INTEGER,
85
+ },
86
+ willRunAt: {
87
+ type: core_1.DataTypes.INTEGER,
88
+ },
89
+ cancelled: {
90
+ type: core_1.DataTypes.BOOLEAN,
91
+ defaultValue: false,
92
+ },
93
+ createdAt: {
94
+ type: core_1.DataTypes.DATE,
95
+ defaultValue: core_1.DataTypes.NOW,
96
+ index: true,
97
+ },
98
+ updatedAt: {
99
+ type: core_1.DataTypes.DATE,
100
+ defaultValue: core_1.DataTypes.NOW,
101
+ index: true,
102
+ },
103
+ });
104
+ console.log('[20250615-genesis:up] Migrated successfully!');
105
+ });
106
+ }
107
+ function down(_a) {
108
+ return __awaiter(this, arguments, void 0, function* ({ context }) {
109
+ console.log('[20250615-genesis:down] Migrating...');
110
+ yield context.dropTable('snap');
111
+ yield context.dropTable('jobs');
112
+ console.log('[20250615-genesis:down] Migrated successfully!');
113
+ });
114
+ }