@arcblock/crawler 1.4.7 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  import { Page } from '@blocklet/puppeteer';
2
2
  import { JobState, SnapshotModel } from './store';
3
- declare let crawlQueue: any;
4
- declare let syncQueue: any;
5
- declare let codeQueue: any;
6
- declare let cronQueue: any;
7
- export { crawlQueue, syncQueue, codeQueue, cronQueue };
3
+ export declare const queueMap: {
4
+ urlCrawler: any;
5
+ syncCrawler: any;
6
+ codeCrawler: any;
7
+ cronJobs: any;
8
+ };
8
9
  export declare function initQueue(): void;
9
10
  type PageHandler = {
10
11
  handleScreenshot?: (page: Page, params?: JobState) => Promise<Buffer | null>;
@@ -28,6 +29,7 @@ export declare const getPageContent: ({ url, includeScreenshot, includeHtml, wid
28
29
  * @param params
29
30
  * @param callback callback when job finished
30
31
  */
31
- export declare function enqueue(queue: any, params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
32
+ export declare function enqueue(queueName: keyof typeof queueMap, params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
32
33
  export declare function crawlUrl(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
33
34
  export declare function crawlCode(params: Omit<JobState, 'jobId'>, callback?: (snapshot: SnapshotModel | null) => void): Promise<string>;
35
+ export {};
@@ -12,7 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.getPageContent = exports.cronQueue = exports.codeQueue = exports.syncQueue = exports.crawlQueue = void 0;
15
+ exports.getPageContent = exports.queueMap = void 0;
16
16
  exports.initQueue = initQueue;
17
17
  exports.createCrawlQueue = createCrawlQueue;
18
18
  exports.getDataDir = getDataDir;
@@ -26,49 +26,57 @@ const crypto_1 = require("crypto");
26
26
  const fs_extra_1 = __importDefault(require("fs-extra"));
27
27
  const path_1 = __importDefault(require("path"));
28
28
  const config_1 = require("./config");
29
+ const metrics_1 = require("./metrics");
29
30
  const puppeteer_1 = require("./puppeteer");
30
31
  const carbon_1 = require("./services/carbon");
31
32
  const snapshot_1 = require("./services/snapshot");
32
33
  const store_1 = require("./store");
33
34
  const utils_1 = require("./utils");
34
35
  const { BaseState } = require('@abtnode/models');
35
- let crawlQueue;
36
- let syncQueue;
37
- let codeQueue;
38
- let cronQueue;
36
+ exports.queueMap = {
37
+ urlCrawler: null,
38
+ syncCrawler: null,
39
+ codeCrawler: null,
40
+ cronJobs: null,
41
+ };
39
42
  function initQueue() {
40
- exports.crawlQueue = crawlQueue = createCrawlQueue('urlCrawler');
41
- exports.syncQueue = syncQueue = createCrawlQueue('syncCrawler');
42
- exports.codeQueue = codeQueue = createCrawlQueue('codeCrawler', {
43
+ exports.queueMap.urlCrawler = createCrawlQueue('urlCrawler');
44
+ exports.queueMap.syncCrawler = createCrawlQueue('syncCrawler');
45
+ exports.queueMap.codeCrawler = createCrawlQueue('codeCrawler', {
43
46
  handleScreenshot: carbon_1.createCarbonImage,
44
47
  });
45
- exports.cronQueue = cronQueue = createCrawlQueue('cronJobs');
48
+ exports.queueMap.cronJobs = createCrawlQueue('cronJobs');
46
49
  }
47
50
  function createCrawlQueue(queue, handler) {
48
51
  const db = new BaseState(store_1.Job);
49
52
  return (0, queue_1.default)({
50
53
  store: new sequelize_1.default(db, queue),
51
- concurrency: config_1.config.concurrency,
54
+ options: {
55
+ concurrency: config_1.config.concurrency,
56
+ enableScheduledJob: true,
57
+ },
52
58
  onJob: (job) => __awaiter(this, void 0, void 0, function* () {
53
- config_1.logger.info('Starting to execute crawl job', job);
54
- // check robots.txt
55
- if (!job.ignoreRobots) {
56
- const canCrawl = yield (0, utils_1.isAcceptCrawler)(job.url);
57
- if (!canCrawl) {
58
- config_1.logger.error(`failed to crawl ${job.url}, denied by robots.txt`, job);
59
- const snapshot = (0, snapshot_1.convertJobToSnapshot)({
60
- job,
61
- snapshot: {
62
- status: 'failed',
63
- error: 'Denied by robots.txt',
64
- },
65
- });
66
- yield store_1.Snapshot.upsert(snapshot);
67
- return snapshot;
68
- }
69
- }
59
+ const startTime = Date.now();
60
+ let status = 'failed';
70
61
  const formattedJob = Object.assign(Object.assign({}, job), { cookies: (config_1.config.cookies || []).concat(job.cookies || []), localStorage: (config_1.config.localStorage || []).concat(job.localStorage || []), url: (0, utils_1.formatUrl)(job.url) });
71
62
  try {
63
+ config_1.logger.info(`Starting to execute ${queue} job`, Object.assign(Object.assign({}, job), { queueSize: yield store_1.Job.count() }));
64
+ // check robots.txt
65
+ if (!job.ignoreRobots) {
66
+ const canCrawl = yield (0, utils_1.isAcceptCrawler)(job.url);
67
+ if (!canCrawl) {
68
+ config_1.logger.error(`failed to crawl ${job.url}, denied by robots.txt`, job);
69
+ const snapshot = (0, snapshot_1.convertJobToSnapshot)({
70
+ job,
71
+ snapshot: {
72
+ status: 'failed',
73
+ error: 'Denied by robots.txt',
74
+ },
75
+ });
76
+ yield store_1.Snapshot.upsert(snapshot);
77
+ return snapshot;
78
+ }
79
+ }
72
80
  // get page content later
73
81
  const result = yield (0, exports.getPageContent)(formattedJob, handler);
74
82
  if (!result || (!result.html && !result.screenshot)) {
@@ -86,18 +94,13 @@ function createCrawlQueue(queue, handler) {
86
94
  const snapshot = yield store_1.sequelize.transaction((txn) => __awaiter(this, void 0, void 0, function* () {
87
95
  // delete old snapshot
88
96
  if (formattedJob.replace) {
89
- try {
90
- const deletedJobIds = yield (0, snapshot_1.deleteSnapshots)({
91
- url: formattedJob.url,
92
- replace: true,
93
- }, { txn });
94
- if (deletedJobIds) {
95
- config_1.logger.info('Deleted old snapshot', { deletedJobIds });
96
- }
97
- }
98
- catch (error) {
97
+ const deletedJobIds = yield (0, snapshot_1.deleteSnapshots)({
98
+ url: formattedJob.url,
99
+ replace: true,
100
+ }, { txn }).catch((error) => {
99
101
  config_1.logger.error('Failed to delete old snapshot', { error, formattedJob });
100
- }
102
+ });
103
+ config_1.logger.info('Deleted old snapshot', { deletedJobIds });
101
104
  }
102
105
  // save html and screenshot to data dir
103
106
  const { screenshotPath, htmlPath } = yield saveSnapshotToLocal({
@@ -117,10 +120,12 @@ function createCrawlQueue(queue, handler) {
117
120
  yield store_1.Snapshot.upsert(snapshot, { transaction: txn });
118
121
  return snapshot;
119
122
  }));
123
+ status = 'success';
120
124
  return snapshot;
121
125
  }
122
126
  catch (error) {
123
127
  config_1.logger.error(`Failed to crawl ${formattedJob.url}`, { error, formattedJob });
128
+ status = 'failed';
124
129
  const snapshot = (0, snapshot_1.convertJobToSnapshot)({
125
130
  job: formattedJob,
126
131
  snapshot: {
@@ -131,6 +136,14 @@ function createCrawlQueue(queue, handler) {
131
136
  yield store_1.Snapshot.upsert(snapshot);
132
137
  return snapshot;
133
138
  }
139
+ finally {
140
+ const now = Date.now();
141
+ metrics_1.jobsTotal.inc({ queue, status });
142
+ metrics_1.jobDurationSeconds.observe({ queue, status }, (now - startTime) / 1000);
143
+ if (job.enqueuedAt) {
144
+ metrics_1.jobTotalLatencySeconds.observe({ queue, status }, (now - job.enqueuedAt) / 1000);
145
+ }
146
+ }
134
147
  }),
135
148
  });
136
149
  }
@@ -166,7 +179,7 @@ function saveSnapshotToLocal(_a) {
166
179
  };
167
180
  });
168
181
  }
169
- const getPageContent = (_a, handler_1) => __awaiter(void 0, [_a, handler_1], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, format = 'webp', timeout = 90 * 1000, waitTime = 0, fullPage = false, headers, cookies, localStorage, }, handler) {
182
+ const getPageContent = (_a, handler_1) => __awaiter(void 0, [_a, handler_1], void 0, function* ({ url, includeScreenshot = true, includeHtml = true, width = 1440, height = 900, quality = 80, format = 'webp', timeout = 60 * 1000, waitTime = 0, fullPage = false, headers, cookies, localStorage, }, handler) {
170
183
  const page = yield (0, puppeteer_1.initPage)();
171
184
  if (width && height) {
172
185
  yield page.setViewport({ width, height, deviceScaleFactor: 2 });
@@ -299,31 +312,59 @@ exports.getPageContent = getPageContent;
299
312
  * @param callback callback when job finished
300
313
  */
301
314
  // eslint-disable-next-line require-await
302
- function enqueue(queue, params, callback) {
315
+ function enqueue(queueName, params, callback) {
303
316
  return __awaiter(this, void 0, void 0, function* () {
317
+ const queue = exports.queueMap[queueName];
318
+ if (!queue) {
319
+ throw new Error(`Queue ${queueName} not found`);
320
+ }
304
321
  // skip duplicate job
305
322
  const existsJob = yield store_1.Job.isExists(params);
306
323
  if (existsJob && !params.sync) {
307
324
  config_1.logger.info(`Crawl job already exists for ${params.url}, skip`);
308
325
  return existsJob.id;
309
326
  }
310
- config_1.logger.info('enqueue crawl job', params);
311
327
  const jobId = (0, crypto_1.randomUUID)();
312
- const job = queue.push(Object.assign(Object.assign({}, params), { id: jobId }));
328
+ const enqueuedAt = Date.now();
329
+ const job = queue.push({ job: Object.assign(Object.assign({}, params), { id: jobId, enqueuedAt }), jobId });
330
+ metrics_1.jobsEnqueuedTotal.inc({ queue: queueName });
331
+ // Get current queue size for logging
332
+ const queueSize = yield store_1.Job.count();
333
+ config_1.logger.info('enqueue crawl job', Object.assign(Object.assign({}, params), { jobId, queueSize }));
313
334
  job.on('finished', (_a) => __awaiter(this, [_a], void 0, function* ({ result }) {
314
- config_1.logger.info(`Crawl completed ${params.url}, status: ${result ? 'success' : 'failed'}`, { job: params, result });
315
- callback === null || callback === void 0 ? void 0 : callback(result ? yield (0, snapshot_1.formatSnapshot)(result) : null);
335
+ try {
336
+ const isSuccess = (result === null || result === void 0 ? void 0 : result.status) === 'success';
337
+ const queueSize = yield store_1.Job.count();
338
+ config_1.logger.info(`Crawl completed ${params.url}, status: ${isSuccess ? 'success' : 'failed'}`, {
339
+ job: params,
340
+ result,
341
+ queueSize,
342
+ });
343
+ callback === null || callback === void 0 ? void 0 : callback(result ? yield (0, snapshot_1.formatSnapshot)(result) : null);
344
+ }
345
+ catch (error) {
346
+ config_1.logger.error(`Error in finished event handler for ${params.url}`, { error });
347
+ callback === null || callback === void 0 ? void 0 : callback(null);
348
+ }
349
+ }));
350
+ job.on('failed', (_a) => __awaiter(this, [_a], void 0, function* ({ error }) {
351
+ try {
352
+ const queueSize = yield store_1.Job.count();
353
+ config_1.logger.error(`Failed to execute job for ${params.url}`, { error, job: params, queueSize });
354
+ }
355
+ catch (err) {
356
+ config_1.logger.error(`Error in failed event handler for ${params.url}`, { error: err });
357
+ }
358
+ finally {
359
+ callback === null || callback === void 0 ? void 0 : callback(null);
360
+ }
316
361
  }));
317
- job.on('failed', ({ error }) => {
318
- config_1.logger.error(`Failed to execute job for ${params.url}`, { error, job: params });
319
- callback === null || callback === void 0 ? void 0 : callback(null);
320
- });
321
362
  return jobId;
322
363
  });
323
364
  }
324
365
  function crawlUrl(params, callback) {
325
- return enqueue(params.sync ? syncQueue : crawlQueue, params, callback);
366
+ return enqueue(params.sync ? 'syncCrawler' : 'urlCrawler', params, callback);
326
367
  }
327
368
  function crawlCode(params, callback) {
328
- return enqueue(codeQueue, Object.assign({ ignoreRobots: true, includeHtml: false, includeScreenshot: true }, params), callback);
369
+ return enqueue('codeCrawler', Object.assign({ ignoreRobots: true, includeHtml: false, includeScreenshot: true }, params), callback);
329
370
  }
@@ -1,5 +1,7 @@
1
1
  import { Config } from './config';
2
2
  export * from './crawler';
3
3
  export * from './services/snapshot';
4
+ export * from './store/job';
4
5
  export * as utils from './utils';
6
+ export * from './metrics';
5
7
  export declare function initCrawler(params: Pick<Config, 'puppeteerPath' | 'siteCron' | 'cookies' | 'localStorage' | 'concurrency'>): Promise<void>;
package/lib/cjs/index.js CHANGED
@@ -59,7 +59,9 @@ const puppeteer_1 = require("./puppeteer");
59
59
  const migrate_1 = require("./store/migrate");
60
60
  __exportStar(require("./crawler"), exports);
61
61
  __exportStar(require("./services/snapshot"), exports);
62
+ __exportStar(require("./store/job"), exports);
62
63
  exports.utils = __importStar(require("./utils"));
64
+ __exportStar(require("./metrics"), exports);
63
65
  function initCrawler(params) {
64
66
  return __awaiter(this, void 0, void 0, function* () {
65
67
  var _a;
@@ -0,0 +1,18 @@
1
+ import { Counter, Gauge, Histogram } from 'prom-client';
2
+ export declare const jobsTotal: Counter<"queue" | "status">;
3
+ export declare const jobsEnqueuedTotal: Counter<"queue">;
4
+ export declare const jobDurationSeconds: Histogram<"queue" | "status">;
5
+ export declare const jobTotalLatencySeconds: Histogram<"queue" | "status">;
6
+ export declare const queueSize: Gauge<"queue">;
7
+ /**
8
+ * Collect all metrics by querying database
9
+ */
10
+ export declare function collectMetrics(): Promise<void>;
11
+ /**
12
+ * Get metrics in Prometheus format
13
+ */
14
+ export declare function getMetrics(): Promise<string>;
15
+ /**
16
+ * Get content type for metrics endpoint
17
+ */
18
+ export declare function getContentType(): "text/plain; version=0.0.4; charset=utf-8";
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.queueSize = exports.jobTotalLatencySeconds = exports.jobDurationSeconds = exports.jobsEnqueuedTotal = exports.jobsTotal = void 0;
13
+ exports.collectMetrics = collectMetrics;
14
+ exports.getMetrics = getMetrics;
15
+ exports.getContentType = getContentType;
16
+ const prom_client_1 = require("prom-client");
17
+ const store_1 = require("./store");
18
+ // Create a new registry
19
+ const register = new prom_client_1.Registry();
20
+ // ========== Counter - 爬取任务计数 ==========
21
+ exports.jobsTotal = new prom_client_1.Counter({
22
+ name: 'crawler_jobs_total',
23
+ help: 'Total number of crawl jobs processed',
24
+ labelNames: ['queue', 'status'],
25
+ registers: [register],
26
+ });
27
+ // ========== Counter - 入队任务数 ==========
28
+ exports.jobsEnqueuedTotal = new prom_client_1.Counter({
29
+ name: 'crawler_jobs_enqueued_total',
30
+ help: 'Total number of crawl jobs enqueued',
31
+ labelNames: ['queue'],
32
+ registers: [register],
33
+ });
34
+ // ========== Histogram - 任务执行耗时 ==========
35
+ exports.jobDurationSeconds = new prom_client_1.Histogram({
36
+ name: 'crawler_job_duration_seconds',
37
+ help: 'Duration of crawl job execution in seconds',
38
+ labelNames: ['queue', 'status'],
39
+ buckets: [10, 30, 60, 120, 300, 600, 900, 1800, 3600],
40
+ registers: [register],
41
+ });
42
+ // ========== Histogram - 入队到完成总耗时 ==========
43
+ exports.jobTotalLatencySeconds = new prom_client_1.Histogram({
44
+ name: 'crawler_job_total_latency_seconds',
45
+ help: 'Total latency from enqueue to completion in seconds',
46
+ labelNames: ['queue', 'status'],
47
+ buckets: [10, 30, 60, 120, 300, 600, 900, 1800, 3600],
48
+ registers: [register],
49
+ });
50
+ // ========== Gauge - 队列大小 ==========
51
+ exports.queueSize = new prom_client_1.Gauge({
52
+ name: 'crawler_queue_size',
53
+ help: 'Current number of jobs in queue',
54
+ labelNames: ['queue'],
55
+ registers: [register],
56
+ });
57
+ /**
58
+ * Collect all metrics by querying database
59
+ */
60
+ function collectMetrics() {
61
+ return __awaiter(this, void 0, void 0, function* () {
62
+ try {
63
+ // 收集队列大小
64
+ const jobStats = yield store_1.Job.stats();
65
+ jobStats.queues.forEach((q) => {
66
+ exports.queueSize.set({ queue: q.queue }, q.count);
67
+ });
68
+ }
69
+ catch (error) {
70
+ console.error('Error collecting metrics:', error);
71
+ }
72
+ });
73
+ }
74
+ /**
75
+ * Get metrics in Prometheus format
76
+ */
77
+ function getMetrics() {
78
+ return __awaiter(this, void 0, void 0, function* () {
79
+ yield collectMetrics();
80
+ return register.metrics();
81
+ });
82
+ }
83
+ /**
84
+ * Get content type for metrics endpoint
85
+ */
86
+ function getContentType() {
87
+ return register.contentType;
88
+ }
@@ -24,6 +24,7 @@ const fs_extra_1 = __importDefault(require("fs-extra"));
24
24
  const path_1 = __importDefault(require("path"));
25
25
  const timers_1 = require("timers");
26
26
  const config_1 = require("./config");
27
+ const store_1 = require("./store");
27
28
  const utils_1 = require("./utils");
28
29
  const BrowserStatus = {
29
30
  None: 'None',
@@ -124,21 +125,25 @@ function launchBrowser() {
124
125
  '--no-sandbox',
125
126
  '--no-zygote',
126
127
  '--disable-setuid-sandbox',
127
- '--disable-gpu',
128
128
  '--disable-dev-shm-usage',
129
129
  '--disable-site-isolation-trials',
130
- '--disable-accelerated-2d-canvas',
131
130
  '--disable-extensions',
132
- '--js-flags=--max_old_space_size=512', // 限制V8内存
131
+ '--js-flags=--max_old_space_size=768', // 限制V8内存
133
132
  '--disable-background-networking',
134
133
  '--disable-default-apps',
135
134
  // '--disable-web-security', // 允许跨域请求
136
- '--disable-software-rasterizer',
137
135
  '--disable-crash-reporter',
138
136
  '--disable-service-workers',
139
137
  '--disable-notifications',
140
138
  '--disable-infobars',
141
139
  '--font-render-hinting=none',
140
+ // WebGL: use software GL fallback for servers without GPU
141
+ '--enable-webgl',
142
+ '--ignore-gpu-blocklist',
143
+ '--use-gl=swiftshader',
144
+ '--use-angle=swiftshader',
145
+ '--enable-unsafe-swiftshader',
146
+ '--disable-gpu-sandbox',
142
147
  ],
143
148
  });
144
149
  config_1.logger.info('Launch browser');
@@ -162,12 +167,18 @@ function checkBrowserActivated() {
162
167
  var _a;
163
168
  if (browser) {
164
169
  const pages = yield browser.pages().catch(() => []);
165
- if (pages.length === 1 && ((_a = pages[0]) === null || _a === void 0 ? void 0 : _a.url()) === 'about:blank') {
170
+ const jobCount = yield store_1.Job.count().catch(() => 0);
171
+ // Check if browser is inactive: only blank page AND no pending jobs
172
+ const isInactive = pages.length === 1 && ((_a = pages[0]) === null || _a === void 0 ? void 0 : _a.url()) === 'about:blank' && jobCount === 0;
173
+ if (isInactive) {
166
174
  count++;
167
175
  config_1.logger.debug(`Browser inactive count: ${count}/3`);
168
176
  }
169
177
  else {
170
- count = 0; // 重置计数器!
178
+ count = 0;
179
+ if (jobCount > 0) {
180
+ config_1.logger.debug(`Browser has ${jobCount} pending jobs, keeping active`);
181
+ }
171
182
  }
172
183
  if (count >= 3) {
173
184
  config_1.logger.info('Browser inactive for 3 minutes, closing...');
package/lib/cjs/site.js CHANGED
@@ -17,6 +17,7 @@ const uniq_1 = __importDefault(require("lodash/uniq"));
17
17
  const node_crypto_1 = require("node:crypto");
18
18
  const config_1 = require("./config");
19
19
  const crawler_1 = require("./crawler");
20
+ const metrics_1 = require("./metrics");
20
21
  const store_1 = require("./store");
21
22
  const utils_1 = require("./utils");
22
23
  const crawlBlockletRunningMap = new Map();
@@ -69,21 +70,30 @@ const crawlSite = (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, path
69
70
  });
70
71
  crawlCount++;
71
72
  const jobId = (0, node_crypto_1.randomUUID)();
72
- crawler_1.cronQueue.push({
73
- id: jobId,
74
- url,
75
- lastModified: sitemapItem.lastmod,
76
- includeScreenshot: false,
77
- includeHtml: true,
78
- replace: true,
73
+ crawler_1.queueMap.cronJobs.push({
74
+ job: {
75
+ id: jobId,
76
+ url,
77
+ lastModified: sitemapItem.lastmod,
78
+ includeScreenshot: false,
79
+ includeHtml: true,
80
+ replace: true,
81
+ enqueuedAt: Date.now(),
82
+ },
83
+ jobId,
84
+ delay: 5,
79
85
  });
86
+ metrics_1.jobsEnqueuedTotal.inc({ queue: 'cronJobs' });
80
87
  return jobId;
81
88
  }), { concurrency: ((_b = config_1.config.siteCron) === null || _b === void 0 ? void 0 : _b.concurrency) || 30 });
89
+ // Get current queue size for logging
90
+ const queueSize = yield store_1.Job.count();
82
91
  config_1.logger.info('Enqueued jobs from sitemap finished', {
83
92
  url,
84
93
  pathname,
85
94
  processCount,
86
95
  crawlCount,
96
+ queueSize,
87
97
  });
88
98
  return jobIds;
89
99
  }
@@ -17,6 +17,7 @@ export interface JobState {
17
17
  replace?: boolean;
18
18
  sync?: boolean;
19
19
  ignoreRobots?: boolean;
20
+ enqueuedAt?: number;
20
21
  headers?: Record<string, string>;
21
22
  cookies?: CookieParam[];
22
23
  localStorage?: {
@@ -32,18 +33,46 @@ export interface JobModel {
32
33
  willRunAt: number;
33
34
  delay: number;
34
35
  cancelled: boolean;
36
+ processingBy: string | null;
37
+ processingAt: number | null;
35
38
  }
36
39
  export declare class Job extends Model<JobModel> implements JobModel {
37
40
  id: JobModel['id'];
38
41
  queue: JobModel['queue'];
39
42
  job: JobModel['job'];
40
43
  retryCount: JobModel['retryCount'];
41
- willRunAt: JobModel['willRunAt'];
42
44
  delay: JobModel['delay'];
45
+ willRunAt: JobModel['willRunAt'];
43
46
  cancelled: JobModel['cancelled'];
47
+ processingBy: JobModel['processingBy'];
48
+ processingAt: JobModel['processingAt'];
44
49
  static initModel(sequelize: Sequelize): typeof Job;
45
50
  static findJob(condition: Partial<JobState>): Promise<JobModel | null>;
46
51
  static isExists(condition: Partial<JobState> & {
47
52
  url: string;
48
53
  }): Promise<JobModel | null | undefined>;
54
+ static paginate({ page, pageSize, queue, }?: {
55
+ page?: number;
56
+ pageSize?: number;
57
+ queue?: string;
58
+ }): Promise<{
59
+ total: number;
60
+ page: number;
61
+ pageSize: number;
62
+ totalPages: number;
63
+ data: JobModel[];
64
+ }>;
65
+ static stats(): Promise<{
66
+ total: number;
67
+ queues: {
68
+ queue: string;
69
+ count: number;
70
+ }[];
71
+ }>;
72
+ static deleteByQueue(queue: string): Promise<{
73
+ deleted: number;
74
+ }>;
75
+ static deleteByIds(ids: string[]): Promise<{
76
+ deleted: number;
77
+ }>;
49
78
  }
@@ -76,6 +76,14 @@ class Job extends core_1.Model {
76
76
  type: core_1.DataTypes.BOOLEAN,
77
77
  defaultValue: false,
78
78
  },
79
+ processingBy: {
80
+ type: core_1.DataTypes.STRING(32),
81
+ allowNull: true,
82
+ },
83
+ processingAt: {
84
+ type: core_1.DataTypes.INTEGER,
85
+ allowNull: true,
86
+ },
79
87
  createdAt: {
80
88
  type: core_1.DataTypes.DATE,
81
89
  defaultValue: core_1.DataTypes.NOW,
@@ -127,5 +135,53 @@ class Job extends core_1.Model {
127
135
  return existsJob === null || existsJob === void 0 ? void 0 : existsJob.get();
128
136
  });
129
137
  }
138
+ static paginate() {
139
+ return __awaiter(this, arguments, void 0, function* ({ page = 1, pageSize = 20, queue, } = {}) {
140
+ const where = queue ? { queue } : {};
141
+ const offset = (page - 1) * pageSize;
142
+ const { count, rows } = yield Job.findAndCountAll({
143
+ where,
144
+ order: [['createdAt', 'DESC']],
145
+ limit: pageSize,
146
+ offset,
147
+ });
148
+ return {
149
+ total: count,
150
+ page,
151
+ pageSize,
152
+ totalPages: Math.ceil(count / pageSize),
153
+ data: rows.map((row) => row.toJSON()),
154
+ };
155
+ });
156
+ }
157
+ static stats() {
158
+ return __awaiter(this, void 0, void 0, function* () {
159
+ const results = (yield Job.findAll({
160
+ attributes: ['queue', [core_1.default.fn('COUNT', core_1.default.col('id')), 'count']],
161
+ group: ['queue'],
162
+ raw: true,
163
+ }));
164
+ const total = results.reduce((sum, item) => sum + Number(item.count), 0);
165
+ return {
166
+ total,
167
+ queues: results.map((item) => ({
168
+ queue: item.queue,
169
+ count: Number(item.count),
170
+ })),
171
+ };
172
+ });
173
+ }
174
+ static deleteByQueue(queue) {
175
+ return __awaiter(this, void 0, void 0, function* () {
176
+ const count = yield Job.destroy({ where: { queue } });
177
+ return { deleted: count };
178
+ });
179
+ }
180
+ static deleteByIds(ids) {
181
+ return __awaiter(this, void 0, void 0, function* () {
182
+ const count = yield Job.destroy({ where: { id: ids } });
183
+ return { deleted: count };
184
+ });
185
+ }
130
186
  }
131
187
  exports.Job = Job;
@@ -40,6 +40,7 @@ const umzug_1 = require("umzug");
40
40
  const index_1 = require("./index");
41
41
  const migration20250615 = __importStar(require("./migrations/20250615-genesis"));
42
42
  const migration20250616Replace = __importStar(require("./migrations/20250616-replace"));
43
+ const migration20251226JobProcessing = __importStar(require("./migrations/20251226-job-processing"));
43
44
  const umzug = new umzug_1.Umzug({
44
45
  migrations: [
45
46
  {
@@ -52,6 +53,11 @@ const umzug = new umzug_1.Umzug({
52
53
  up: ({ context }) => migration20250616Replace.up({ context }),
53
54
  down: ({ context }) => migration20250616Replace.down({ context }),
54
55
  },
56
+ {
57
+ name: '20251226-job-processing',
58
+ up: ({ context }) => migration20251226JobProcessing.up({ context }),
59
+ down: ({ context }) => migration20251226JobProcessing.down({ context }),
60
+ },
55
61
  ],
56
62
  context: index_1.sequelize.getQueryInterface(),
57
63
  storage: new umzug_1.SequelizeStorage({ sequelize: index_1.sequelize }),
@@ -0,0 +1,6 @@
1
+ export declare function up({ context }: {
2
+ context: any;
3
+ }): Promise<void>;
4
+ export declare function down({ context }: {
5
+ context: any;
6
+ }): Promise<void>;