crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JobManager - Async job management with persistence, tracking, and cancellation
|
|
3
|
+
* Supports job creation, status tracking, persistence, expiration, and cancellation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { promises as fs } from 'fs';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
import { EventEmitter } from 'events';
|
|
10
|
+
|
|
11
|
+
export class JobManager extends EventEmitter {
|
|
12
|
+
constructor(options = {}) {
|
|
13
|
+
super();
|
|
14
|
+
|
|
15
|
+
const {
|
|
16
|
+
storageDir = './jobs',
|
|
17
|
+
defaultTtl = 24 * 60 * 60 * 1000, // 24 hours
|
|
18
|
+
cleanupInterval = 60 * 60 * 1000, // 1 hour
|
|
19
|
+
enablePersistence = true,
|
|
20
|
+
maxJobs = 1000,
|
|
21
|
+
enableMonitoring = true,
|
|
22
|
+
monitoringInterval = 30000 // 30 seconds
|
|
23
|
+
} = options;
|
|
24
|
+
|
|
25
|
+
this.storageDir = storageDir;
|
|
26
|
+
this.defaultTtl = defaultTtl;
|
|
27
|
+
this.enablePersistence = enablePersistence;
|
|
28
|
+
this.maxJobs = maxJobs;
|
|
29
|
+
this.enableMonitoring = enableMonitoring;
|
|
30
|
+
|
|
31
|
+
// In-memory job storage
|
|
32
|
+
this.jobs = new Map();
|
|
33
|
+
this.jobsByStatus = new Map([
|
|
34
|
+
['pending', new Set()],
|
|
35
|
+
['running', new Set()],
|
|
36
|
+
['completed', new Set()],
|
|
37
|
+
['failed', new Set()],
|
|
38
|
+
['cancelled', new Set()]
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
// Job execution callbacks
|
|
42
|
+
this.executors = new Map();
|
|
43
|
+
|
|
44
|
+
// Statistics
|
|
45
|
+
this.stats = {
|
|
46
|
+
totalJobs: 0,
|
|
47
|
+
activeJobs: 0,
|
|
48
|
+
completedJobs: 0,
|
|
49
|
+
failedJobs: 0,
|
|
50
|
+
cancelledJobs: 0,
|
|
51
|
+
averageExecutionTime: 0,
|
|
52
|
+
lastUpdated: Date.now()
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Job states
|
|
56
|
+
this.JOB_STATES = {
|
|
57
|
+
PENDING: 'pending',
|
|
58
|
+
RUNNING: 'running',
|
|
59
|
+
COMPLETED: 'completed',
|
|
60
|
+
FAILED: 'failed',
|
|
61
|
+
CANCELLED: 'cancelled'
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// Initialize storage if persistence enabled
|
|
65
|
+
if (this.enablePersistence) {
|
|
66
|
+
this.initStorage();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Start cleanup interval
|
|
70
|
+
this.cleanupTimer = setInterval(() => {
|
|
71
|
+
this.cleanupExpiredJobs();
|
|
72
|
+
}, cleanupInterval);
|
|
73
|
+
|
|
74
|
+
// Start monitoring if enabled
|
|
75
|
+
if (this.enableMonitoring) {
|
|
76
|
+
this.startMonitoring(monitoringInterval);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Initialize persistent storage
|
|
82
|
+
*/
|
|
83
|
+
async initStorage() {
|
|
84
|
+
try {
|
|
85
|
+
await fs.mkdir(this.storageDir, { recursive: true });
|
|
86
|
+
// Load existing jobs on startup
|
|
87
|
+
await this.loadPersistedJobs();
|
|
88
|
+
} catch (error) {
|
|
89
|
+
console.error('Failed to initialize job storage:', error);
|
|
90
|
+
this.enablePersistence = false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Create a new job
|
|
96
|
+
* @param {string} type - Job type identifier
|
|
97
|
+
* @param {Object} data - Job data/payload
|
|
98
|
+
* @param {Object} options - Job options
|
|
99
|
+
* @returns {Promise<Object>} Job object
|
|
100
|
+
*/
|
|
101
|
+
async createJob(type, data = {}, options = {}) {
|
|
102
|
+
const {
|
|
103
|
+
priority = 0,
|
|
104
|
+
ttl = this.defaultTtl,
|
|
105
|
+
maxRetries = 0,
|
|
106
|
+
retryDelay = 5000,
|
|
107
|
+
webhooks = [],
|
|
108
|
+
tags = [],
|
|
109
|
+
dependencies = [],
|
|
110
|
+
metadata = {}
|
|
111
|
+
} = options;
|
|
112
|
+
|
|
113
|
+
// Generate unique job ID
|
|
114
|
+
const jobId = this.generateJobId();
|
|
115
|
+
const now = Date.now();
|
|
116
|
+
|
|
117
|
+
const job = {
|
|
118
|
+
id: jobId,
|
|
119
|
+
type,
|
|
120
|
+
data,
|
|
121
|
+
status: this.JOB_STATES.PENDING,
|
|
122
|
+
priority,
|
|
123
|
+
ttl,
|
|
124
|
+
maxRetries,
|
|
125
|
+
currentRetries: 0,
|
|
126
|
+
retryDelay,
|
|
127
|
+
webhooks,
|
|
128
|
+
tags,
|
|
129
|
+
dependencies,
|
|
130
|
+
metadata,
|
|
131
|
+
createdAt: now,
|
|
132
|
+
updatedAt: now,
|
|
133
|
+
expiresAt: now + ttl,
|
|
134
|
+
startedAt: null,
|
|
135
|
+
completedAt: null,
|
|
136
|
+
result: null,
|
|
137
|
+
error: null,
|
|
138
|
+
progress: 0,
|
|
139
|
+
logs: []
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Store job
|
|
143
|
+
this.jobs.set(jobId, job);
|
|
144
|
+
this.jobsByStatus.get(this.JOB_STATES.PENDING).add(jobId);
|
|
145
|
+
|
|
146
|
+
// Persist if enabled
|
|
147
|
+
if (this.enablePersistence) {
|
|
148
|
+
await this.persistJob(job);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Update statistics
|
|
152
|
+
this.stats.totalJobs++;
|
|
153
|
+
this.stats.activeJobs++;
|
|
154
|
+
this.updateStats();
|
|
155
|
+
|
|
156
|
+
this.emit('jobCreated', job);
|
|
157
|
+
return job;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Get job by ID
|
|
162
|
+
* @param {string} jobId - Job identifier
|
|
163
|
+
* @returns {Object|null} Job object or null if not found
|
|
164
|
+
*/
|
|
165
|
+
getJob(jobId) {
|
|
166
|
+
return this.jobs.get(jobId) || null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Get jobs by status
|
|
171
|
+
* @param {string} status - Job status
|
|
172
|
+
* @returns {Array} Array of job objects
|
|
173
|
+
*/
|
|
174
|
+
getJobsByStatus(status) {
|
|
175
|
+
const jobIds = this.jobsByStatus.get(status);
|
|
176
|
+
if (!jobIds) return [];
|
|
177
|
+
|
|
178
|
+
return Array.from(jobIds)
|
|
179
|
+
.map(id => this.jobs.get(id))
|
|
180
|
+
.filter(Boolean);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Get jobs by type
|
|
185
|
+
* @param {string} type - Job type
|
|
186
|
+
* @returns {Array} Array of job objects
|
|
187
|
+
*/
|
|
188
|
+
getJobsByType(type) {
|
|
189
|
+
return Array.from(this.jobs.values())
|
|
190
|
+
.filter(job => job.type === type);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Get jobs by tag
|
|
195
|
+
* @param {string} tag - Job tag
|
|
196
|
+
* @returns {Array} Array of job objects
|
|
197
|
+
*/
|
|
198
|
+
getJobsByTag(tag) {
|
|
199
|
+
return Array.from(this.jobs.values())
|
|
200
|
+
.filter(job => job.tags.includes(tag));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Update job status
|
|
205
|
+
* @param {string} jobId - Job identifier
|
|
206
|
+
* @param {string} status - New status
|
|
207
|
+
* @param {Object} updates - Additional updates
|
|
208
|
+
* @returns {Promise<Object>} Updated job object
|
|
209
|
+
*/
|
|
210
|
+
async updateJobStatus(jobId, status, updates = {}) {
|
|
211
|
+
const job = this.jobs.get(jobId);
|
|
212
|
+
if (!job) {
|
|
213
|
+
throw new Error(`Job ${jobId} not found`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Remove from old status set
|
|
217
|
+
this.jobsByStatus.get(job.status).delete(jobId);
|
|
218
|
+
|
|
219
|
+
// Update job
|
|
220
|
+
job.status = status;
|
|
221
|
+
job.updatedAt = Date.now();
|
|
222
|
+
|
|
223
|
+
// Add status-specific updates
|
|
224
|
+
switch (status) {
|
|
225
|
+
case this.JOB_STATES.RUNNING:
|
|
226
|
+
job.startedAt = Date.now();
|
|
227
|
+
break;
|
|
228
|
+
case this.JOB_STATES.COMPLETED:
|
|
229
|
+
job.completedAt = Date.now();
|
|
230
|
+
job.result = updates.result || null;
|
|
231
|
+
this.stats.completedJobs++;
|
|
232
|
+
this.stats.activeJobs--;
|
|
233
|
+
break;
|
|
234
|
+
case this.JOB_STATES.FAILED:
|
|
235
|
+
job.completedAt = Date.now();
|
|
236
|
+
job.error = updates.error || null;
|
|
237
|
+
this.stats.failedJobs++;
|
|
238
|
+
this.stats.activeJobs--;
|
|
239
|
+
break;
|
|
240
|
+
case this.JOB_STATES.CANCELLED:
|
|
241
|
+
job.completedAt = Date.now();
|
|
242
|
+
this.stats.cancelledJobs++;
|
|
243
|
+
this.stats.activeJobs--;
|
|
244
|
+
break;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Apply additional updates
|
|
248
|
+
Object.assign(job, updates);
|
|
249
|
+
|
|
250
|
+
// Add to new status set
|
|
251
|
+
this.jobsByStatus.get(status).add(jobId);
|
|
252
|
+
|
|
253
|
+
// Persist if enabled
|
|
254
|
+
if (this.enablePersistence) {
|
|
255
|
+
await this.persistJob(job);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
this.updateStats();
|
|
259
|
+
this.emit('jobUpdated', job, status);
|
|
260
|
+
|
|
261
|
+
return job;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Execute a job
|
|
266
|
+
* @param {string} jobId - Job identifier
|
|
267
|
+
* @returns {Promise<Object>} Job result
|
|
268
|
+
*/
|
|
269
|
+
async executeJob(jobId) {
|
|
270
|
+
const job = this.jobs.get(jobId);
|
|
271
|
+
if (!job) {
|
|
272
|
+
throw new Error(`Job ${jobId} not found`);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (job.status !== this.JOB_STATES.PENDING) {
|
|
276
|
+
throw new Error(`Job ${jobId} is not in pending status (current: ${job.status})`);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Check dependencies
|
|
280
|
+
const unmetDependencies = await this.checkDependencies(job);
|
|
281
|
+
if (unmetDependencies.length > 0) {
|
|
282
|
+
const depList = unmetDependencies.join(', ');
|
|
283
|
+
throw new Error(`Job ${jobId} has unmet dependencies: ${depList}`);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const executor = this.executors.get(job.type);
|
|
287
|
+
if (!executor) {
|
|
288
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.FAILED, {
|
|
289
|
+
error: `No executor registered for job type: ${job.type}`
|
|
290
|
+
});
|
|
291
|
+
throw new Error(`No executor registered for job type: ${job.type}`);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.RUNNING);
|
|
295
|
+
|
|
296
|
+
try {
|
|
297
|
+
const result = await executor(job);
|
|
298
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.COMPLETED, { result });
|
|
299
|
+
|
|
300
|
+
// Calculate execution time
|
|
301
|
+
const executionTime = job.completedAt - job.startedAt;
|
|
302
|
+
this.updateExecutionTime(executionTime);
|
|
303
|
+
|
|
304
|
+
return result;
|
|
305
|
+
} catch (error) {
|
|
306
|
+
// Handle retries
|
|
307
|
+
if (job.currentRetries < job.maxRetries) {
|
|
308
|
+
job.currentRetries++;
|
|
309
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.PENDING, {
|
|
310
|
+
error: error.message,
|
|
311
|
+
currentRetries: job.currentRetries
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// Schedule retry
|
|
315
|
+
setTimeout(() => {
|
|
316
|
+
this.executeJob(jobId).catch(() => {
|
|
317
|
+
// Retry failed, will be handled in next execution
|
|
318
|
+
});
|
|
319
|
+
}, job.retryDelay);
|
|
320
|
+
|
|
321
|
+
throw error;
|
|
322
|
+
} else {
|
|
323
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.FAILED, {
|
|
324
|
+
error: error.message
|
|
325
|
+
});
|
|
326
|
+
throw error;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Cancel a job
|
|
333
|
+
* @param {string} jobId - Job identifier
|
|
334
|
+
* @returns {Promise<Object>} Cancelled job object
|
|
335
|
+
*/
|
|
336
|
+
async cancelJob(jobId) {
|
|
337
|
+
const job = this.jobs.get(jobId);
|
|
338
|
+
if (!job) {
|
|
339
|
+
throw new Error(`Job ${jobId} not found`);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if ([this.JOB_STATES.COMPLETED, this.JOB_STATES.FAILED, this.JOB_STATES.CANCELLED].includes(job.status)) {
|
|
343
|
+
throw new Error(`Job ${jobId} cannot be cancelled (current status: ${job.status})`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
await this.updateJobStatus(jobId, this.JOB_STATES.CANCELLED);
|
|
347
|
+
this.emit('jobCancelled', job);
|
|
348
|
+
|
|
349
|
+
return job;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Register job executor
|
|
354
|
+
* @param {string} type - Job type
|
|
355
|
+
* @param {Function} executor - Executor function
|
|
356
|
+
*/
|
|
357
|
+
registerExecutor(type, executor) {
|
|
358
|
+
this.executors.set(type, executor);
|
|
359
|
+
this.emit('executorRegistered', type);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Unregister job executor
|
|
364
|
+
* @param {string} type - Job type
|
|
365
|
+
*/
|
|
366
|
+
unregisterExecutor(type) {
|
|
367
|
+
this.executors.delete(type);
|
|
368
|
+
this.emit('executorUnregistered', type);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Add log entry to job
|
|
373
|
+
* @param {string} jobId - Job identifier
|
|
374
|
+
* @param {string} level - Log level
|
|
375
|
+
* @param {string} message - Log message
|
|
376
|
+
* @param {Object} data - Additional log data
|
|
377
|
+
*/
|
|
378
|
+
async addJobLog(jobId, level, message, data = {}) {
|
|
379
|
+
const job = this.jobs.get(jobId);
|
|
380
|
+
if (!job) return;
|
|
381
|
+
|
|
382
|
+
const logEntry = {
|
|
383
|
+
timestamp: Date.now(),
|
|
384
|
+
level,
|
|
385
|
+
message,
|
|
386
|
+
data
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
job.logs.push(logEntry);
|
|
390
|
+
job.updatedAt = Date.now();
|
|
391
|
+
|
|
392
|
+
// Keep only last 100 log entries
|
|
393
|
+
if (job.logs.length > 100) {
|
|
394
|
+
job.logs = job.logs.slice(-100);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Persist if enabled
|
|
398
|
+
if (this.enablePersistence) {
|
|
399
|
+
await this.persistJob(job);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
this.emit('jobLog', job, logEntry);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Update job progress
|
|
407
|
+
* @param {string} jobId - Job identifier
|
|
408
|
+
* @param {number} progress - Progress percentage (0-100)
|
|
409
|
+
* @param {string} message - Progress message
|
|
410
|
+
*/
|
|
411
|
+
async updateJobProgress(jobId, progress, message = '') {
|
|
412
|
+
const job = this.jobs.get(jobId);
|
|
413
|
+
if (!job) return;
|
|
414
|
+
|
|
415
|
+
job.progress = Math.max(0, Math.min(100, progress));
|
|
416
|
+
job.updatedAt = Date.now();
|
|
417
|
+
|
|
418
|
+
if (message) {
|
|
419
|
+
await this.addJobLog(jobId, 'info', message, { progress });
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Persist if enabled
|
|
423
|
+
if (this.enablePersistence) {
|
|
424
|
+
await this.persistJob(job);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
this.emit('jobProgress', job, progress, message);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Check job dependencies
|
|
432
|
+
* @param {Object} job - Job object
|
|
433
|
+
* @returns {Promise<Array>} Array of unmet dependency IDs
|
|
434
|
+
*/
|
|
435
|
+
async checkDependencies(job) {
|
|
436
|
+
if (!job.dependencies || job.dependencies.length === 0) {
|
|
437
|
+
return [];
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const unmetDependencies = [];
|
|
441
|
+
|
|
442
|
+
for (const depId of job.dependencies) {
|
|
443
|
+
const depJob = this.jobs.get(depId);
|
|
444
|
+
if (!depJob || depJob.status !== this.JOB_STATES.COMPLETED) {
|
|
445
|
+
unmetDependencies.push(depId);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return unmetDependencies;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* Cleanup expired jobs
|
|
454
|
+
*/
|
|
455
|
+
async cleanupExpiredJobs() {
|
|
456
|
+
const now = Date.now();
|
|
457
|
+
const expiredJobs = [];
|
|
458
|
+
|
|
459
|
+
for (const [jobId, job] of this.jobs) {
|
|
460
|
+
if (job.expiresAt && now > job.expiresAt) {
|
|
461
|
+
expiredJobs.push(jobId);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
for (const jobId of expiredJobs) {
|
|
466
|
+
await this.removeJob(jobId);
|
|
467
|
+
this.emit('jobExpired', jobId);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (expiredJobs.length > 0) {
|
|
471
|
+
this.updateStats();
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Remove job from storage
|
|
477
|
+
* @param {string} jobId - Job identifier
|
|
478
|
+
*/
|
|
479
|
+
async removeJob(jobId) {
|
|
480
|
+
const job = this.jobs.get(jobId);
|
|
481
|
+
if (!job) return;
|
|
482
|
+
|
|
483
|
+
// Remove from status set
|
|
484
|
+
this.jobsByStatus.get(job.status).delete(jobId);
|
|
485
|
+
|
|
486
|
+
// Remove from memory
|
|
487
|
+
this.jobs.delete(jobId);
|
|
488
|
+
|
|
489
|
+
// Remove from persistent storage
|
|
490
|
+
if (this.enablePersistence) {
|
|
491
|
+
await this.removePersistedJob(jobId);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
this.emit('jobRemoved', jobId);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Persist job to disk
|
|
499
|
+
* @param {Object} job - Job object
|
|
500
|
+
*/
|
|
501
|
+
async persistJob(job) {
|
|
502
|
+
if (!this.enablePersistence) return;
|
|
503
|
+
|
|
504
|
+
try {
|
|
505
|
+
const filePath = path.join(this.storageDir, `${job.id}.json`);
|
|
506
|
+
const data = JSON.stringify(job, null, 2);
|
|
507
|
+
await fs.writeFile(filePath, data, 'utf8');
|
|
508
|
+
} catch (error) {
|
|
509
|
+
console.error(`Failed to persist job ${job.id}:`, error);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* Remove persisted job
|
|
515
|
+
* @param {string} jobId - Job identifier
|
|
516
|
+
*/
|
|
517
|
+
async removePersistedJob(jobId) {
|
|
518
|
+
if (!this.enablePersistence) return;
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const filePath = path.join(this.storageDir, `${jobId}.json`);
|
|
522
|
+
await fs.unlink(filePath);
|
|
523
|
+
} catch (error) {
|
|
524
|
+
// Ignore if file doesn't exist
|
|
525
|
+
if (error.code !== 'ENOENT') {
|
|
526
|
+
console.error(`Failed to remove persisted job ${jobId}:`, error);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Load persisted jobs on startup
|
|
533
|
+
*/
|
|
534
|
+
async loadPersistedJobs() {
|
|
535
|
+
if (!this.enablePersistence) return;
|
|
536
|
+
|
|
537
|
+
try {
|
|
538
|
+
const files = await fs.readdir(this.storageDir);
|
|
539
|
+
const jsonFiles = files.filter(file => file.endsWith('.json'));
|
|
540
|
+
|
|
541
|
+
for (const file of jsonFiles) {
|
|
542
|
+
try {
|
|
543
|
+
const filePath = path.join(this.storageDir, file);
|
|
544
|
+
const data = await fs.readFile(filePath, 'utf8');
|
|
545
|
+
const job = JSON.parse(data);
|
|
546
|
+
|
|
547
|
+
// Validate job structure
|
|
548
|
+
if (this.validateJob(job)) {
|
|
549
|
+
this.jobs.set(job.id, job);
|
|
550
|
+
this.jobsByStatus.get(job.status).add(job.id);
|
|
551
|
+
|
|
552
|
+
// Update running jobs to failed on restart (they were interrupted)
|
|
553
|
+
if (job.status === this.JOB_STATES.RUNNING) {
|
|
554
|
+
await this.updateJobStatus(job.id, this.JOB_STATES.FAILED, {
|
|
555
|
+
error: 'Job was interrupted by system restart'
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
} catch (error) {
|
|
560
|
+
console.error(`Failed to load job from ${file}:`, error);
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
this.updateStats();
|
|
565
|
+
} catch (error) {
|
|
566
|
+
console.error('Failed to load persisted jobs:', error);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Validate job object structure
|
|
572
|
+
* @param {Object} job - Job object to validate
|
|
573
|
+
* @returns {boolean} Whether job is valid
|
|
574
|
+
*/
|
|
575
|
+
validateJob(job) {
|
|
576
|
+
return job &&
|
|
577
|
+
typeof job.id === 'string' &&
|
|
578
|
+
typeof job.type === 'string' &&
|
|
579
|
+
typeof job.status === 'string' &&
|
|
580
|
+
Object.values(this.JOB_STATES).includes(job.status);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Generate unique job ID
|
|
585
|
+
* @returns {string} Unique job identifier
|
|
586
|
+
*/
|
|
587
|
+
generateJobId() {
|
|
588
|
+
return crypto.randomUUID();
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Update execution time statistics
|
|
593
|
+
* @param {number} executionTime - Execution time in milliseconds
|
|
594
|
+
*/
|
|
595
|
+
updateExecutionTime(executionTime) {
|
|
596
|
+
const currentAverage = this.stats.averageExecutionTime;
|
|
597
|
+
const completedJobs = this.stats.completedJobs;
|
|
598
|
+
|
|
599
|
+
if (completedJobs === 1) {
|
|
600
|
+
this.stats.averageExecutionTime = executionTime;
|
|
601
|
+
} else {
|
|
602
|
+
this.stats.averageExecutionTime =
|
|
603
|
+
((currentAverage * (completedJobs - 1)) + executionTime) / completedJobs;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
/**
|
|
608
|
+
* Update statistics
|
|
609
|
+
*/
|
|
610
|
+
updateStats() {
|
|
611
|
+
this.stats.activeJobs =
|
|
612
|
+
this.jobsByStatus.get(this.JOB_STATES.PENDING).size +
|
|
613
|
+
this.jobsByStatus.get(this.JOB_STATES.RUNNING).size;
|
|
614
|
+
|
|
615
|
+
this.stats.lastUpdated = Date.now();
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
/**
|
|
619
|
+
* Start monitoring
|
|
620
|
+
* @param {number} interval - Monitoring interval in milliseconds
|
|
621
|
+
*/
|
|
622
|
+
startMonitoring(interval) {
|
|
623
|
+
if (this.monitoringTimer) {
|
|
624
|
+
clearInterval(this.monitoringTimer);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
this.monitoringTimer = setInterval(() => {
|
|
628
|
+
this.updateStats();
|
|
629
|
+
this.emit('monitoring', this.getStats());
|
|
630
|
+
}, interval);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Stop monitoring
|
|
635
|
+
*/
|
|
636
|
+
stopMonitoring() {
|
|
637
|
+
if (this.monitoringTimer) {
|
|
638
|
+
clearInterval(this.monitoringTimer);
|
|
639
|
+
this.monitoringTimer = null;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Get comprehensive statistics
|
|
645
|
+
* @returns {Object} Statistics object
|
|
646
|
+
*/
|
|
647
|
+
getStats() {
|
|
648
|
+
return {
|
|
649
|
+
...this.stats,
|
|
650
|
+
jobCounts: {
|
|
651
|
+
pending: this.jobsByStatus.get(this.JOB_STATES.PENDING).size,
|
|
652
|
+
running: this.jobsByStatus.get(this.JOB_STATES.RUNNING).size,
|
|
653
|
+
completed: this.jobsByStatus.get(this.JOB_STATES.COMPLETED).size,
|
|
654
|
+
failed: this.jobsByStatus.get(this.JOB_STATES.FAILED).size,
|
|
655
|
+
cancelled: this.jobsByStatus.get(this.JOB_STATES.CANCELLED).size
|
|
656
|
+
},
|
|
657
|
+
totalJobsInMemory: this.jobs.size,
|
|
658
|
+
executorCount: this.executors.size
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Cleanup resources
|
|
664
|
+
*/
|
|
665
|
+
destroy() {
|
|
666
|
+
// Clear timers
|
|
667
|
+
if (this.cleanupTimer) {
|
|
668
|
+
clearInterval(this.cleanupTimer);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if (this.monitoringTimer) {
|
|
672
|
+
clearInterval(this.monitoringTimer);
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// Clear data
|
|
676
|
+
this.jobs.clear();
|
|
677
|
+
this.jobsByStatus.forEach(set => set.clear());
|
|
678
|
+
this.executors.clear();
|
|
679
|
+
|
|
680
|
+
// Remove event listeners
|
|
681
|
+
this.removeAllListeners();
|
|
682
|
+
|
|
683
|
+
this.emit('destroyed');
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
export default JobManager;
|