figranium 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -674
- package/README.md +336 -336
- package/agent.js +1 -1
- package/bin/cli.js +149 -149
- package/common-utils.js +211 -211
- package/dist/assets/{favicon-DmUMR1rm.svg → favicon-DXDXzv5K.svg} +290 -290
- package/dist/assets/index-BaVlGc48.js +18 -0
- package/dist/assets/index-T2xxnq_A.css +1 -0
- package/dist/favicon.svg +290 -290
- package/dist/figranium_icon.svg +290 -290
- package/dist/figranium_logo.svg +60 -60
- package/dist/index.html +26 -26
- package/dist/novnc.html +108 -108
- package/dist/styles.css +86 -86
- package/extraction-worker.js +211 -207
- package/headful.js +584 -569
- package/html-utils.js +24 -24
- package/package.json +82 -82
- package/proxy-rotation.js +261 -261
- package/proxy-utils.js +84 -84
- package/public/favicon.svg +290 -290
- package/public/figranium_icon.svg +290 -290
- package/public/figranium_logo.svg +60 -60
- package/public/novnc.html +108 -108
- package/public/styles.css +86 -86
- package/scrape.js +389 -389
- package/scripts/postinstall.js +21 -21
- package/server.js +626 -626
- package/src/server/cron-parser.js +325 -316
- package/src/server/routes/schedules.js +171 -171
- package/src/server/scheduler.js +379 -379
- package/url-utils.js +339 -323
- package/user-agent-settings.js +76 -76
- package/dist/assets/index-C2rVEs3q.css +0 -1
- package/dist/assets/index-CvaIUcTv.js +0 -18
package/server.js
CHANGED
|
@@ -1,626 +1,626 @@
|
|
|
1
|
-
const express = require('express');
|
|
2
|
-
const session = require('express-session');
|
|
3
|
-
const FileStore = require('session-file-store')(session);
|
|
4
|
-
const fs = require('fs');
|
|
5
|
-
const path = require('path');
|
|
6
|
-
const crypto = require('crypto');
|
|
7
|
-
|
|
8
|
-
// Catch unhandled promise rejections from playwright-extra stealth plugin.
|
|
9
|
-
// When pages close before the plugin finishes async CDP initialization,
|
|
10
|
-
// benign rejections bubble up and would otherwise crash the process.
|
|
11
|
-
process.on('unhandledRejection', (reason) => {
|
|
12
|
-
const msg = reason && reason.message ? reason.message : String(reason);
|
|
13
|
-
if (/Target page, context or browser has been closed/i.test(msg)) {
|
|
14
|
-
console.warn('[STEALTH] Suppressed benign rejection:', msg);
|
|
15
|
-
return;
|
|
16
|
-
}
|
|
17
|
-
console.error('Unhandled rejection:', reason);
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
// Constants
|
|
21
|
-
const {
|
|
22
|
-
DEFAULT_PORT,
|
|
23
|
-
DIST_DIR,
|
|
24
|
-
DATA_DIR,
|
|
25
|
-
SESSIONS_DIR,
|
|
26
|
-
SESSION_SECRET_FILE,
|
|
27
|
-
SESSION_TTL_SECONDS,
|
|
28
|
-
NOVNC_PORT,
|
|
29
|
-
WEBSOCKIFY_PATH
|
|
30
|
-
} = require('./src/server/constants');
|
|
31
|
-
|
|
32
|
-
const {
|
|
33
|
-
loadTasks,
|
|
34
|
-
getTaskById
|
|
35
|
-
} = require('./src/server/storage');
|
|
36
|
-
|
|
37
|
-
// Context & Utils
|
|
38
|
-
const {
|
|
39
|
-
executionStreams,
|
|
40
|
-
stopRequests,
|
|
41
|
-
sendExecutionUpdate
|
|
42
|
-
} = require('./src/server/state');
|
|
43
|
-
const {
|
|
44
|
-
findAvailablePort,
|
|
45
|
-
proxyWebsockify,
|
|
46
|
-
isPortAvailable
|
|
47
|
-
} = require('./src/server/utils');
|
|
48
|
-
const { isValidWebSocketOrigin, fetchWithRedirectValidation } = require('./url-utils');
|
|
49
|
-
|
|
50
|
-
// Middleware
|
|
51
|
-
const {
|
|
52
|
-
authRateLimiter,
|
|
53
|
-
dataRateLimiter,
|
|
54
|
-
csrfProtection,
|
|
55
|
-
requireIpAllowlist,
|
|
56
|
-
requireAuth,
|
|
57
|
-
isIpAllowed,
|
|
58
|
-
requireApiKey,
|
|
59
|
-
requireAuthOrApiKey
|
|
60
|
-
} = require('./src/server/middleware');
|
|
61
|
-
|
|
62
|
-
// Feature Modules (Legacy/Existing)
|
|
63
|
-
const { handleScrape } = require('./scrape');
|
|
64
|
-
const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
|
|
65
|
-
const { handleHeadful, stopHeadful, toggleInspectMode, headfulEventEmitter } = require('./headful');
|
|
66
|
-
|
|
67
|
-
// Routes
|
|
68
|
-
const authRoutes = require('./src/server/routes/auth');
|
|
69
|
-
const settingsRoutes = require('./src/server/routes/settings');
|
|
70
|
-
const taskRoutes = require('./src/server/routes/tasks');
|
|
71
|
-
const executionRoutes = require('./src/server/routes/executions');
|
|
72
|
-
const dataRoutes = require('./src/server/routes/data');
|
|
73
|
-
const viewRoutes = require('./src/server/routes/views');
|
|
74
|
-
const scheduleRoutes = require('./src/server/routes/schedules');
|
|
75
|
-
const credentialRoutes = require('./src/server/routes/credentials');
|
|
76
|
-
const healthRoutes = require('./src/server/routes/health');
|
|
77
|
-
const { pushOutput } = require('./src/server/outputProviders');
|
|
78
|
-
const { migrateStorageState } = require('./src/server/migrate-storage');
|
|
79
|
-
const { concurrencyGate } = require('./src/server/execution-queue');
|
|
80
|
-
const { validateUrl } = require('./url-utils');
|
|
81
|
-
|
|
82
|
-
const app = express();
|
|
83
|
-
app.disable('x-powered-by');
|
|
84
|
-
const port = Number(process.env.PORT) || DEFAULT_PORT;
|
|
85
|
-
|
|
86
|
-
// Session Secret Setup
|
|
87
|
-
let SESSION_SECRET = process.env.SESSION_SECRET;
|
|
88
|
-
if (!SESSION_SECRET) {
|
|
89
|
-
try {
|
|
90
|
-
if (fs.existsSync(SESSION_SECRET_FILE)) {
|
|
91
|
-
SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
|
|
92
|
-
} else {
|
|
93
|
-
// Generate secret using crypto.randomBytes
|
|
94
|
-
SESSION_SECRET = crypto.randomBytes(48).toString('hex');
|
|
95
|
-
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
96
|
-
fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
|
|
97
|
-
}
|
|
98
|
-
} catch (e) {
|
|
99
|
-
console.warn('Failed to load session secret from disk, falling back to process env only.');
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
if (!SESSION_SECRET) {
|
|
103
|
-
throw new Error('SESSION_SECRET environment variable is required');
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// Ensure Directories
|
|
107
|
-
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
108
|
-
if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
|
|
109
|
-
|
|
110
|
-
// Trust Proxy
|
|
111
|
-
const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
|
|
112
|
-
if (TRUST_PROXY) {
|
|
113
|
-
app.set('trust proxy', true);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// Session Cookie Secure
|
|
117
|
-
const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
|
|
118
|
-
if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
|
|
119
|
-
console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// Wire up Agent Callbacks
|
|
123
|
-
setProgressReporter(sendExecutionUpdate);
|
|
124
|
-
setStopChecker((runId) => {
|
|
125
|
-
if (!runId) return false;
|
|
126
|
-
if (stopRequests.has(runId)) {
|
|
127
|
-
stopRequests.delete(runId);
|
|
128
|
-
return true;
|
|
129
|
-
}
|
|
130
|
-
return false;
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
// App Middleware
|
|
134
|
-
app.use(requireIpAllowlist);
|
|
135
|
-
|
|
136
|
-
// Security Headers
|
|
137
|
-
app.use((req, res, next) => {
|
|
138
|
-
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
139
|
-
res.setHeader('X-Frame-Options', 'SAMEORIGIN');
|
|
140
|
-
res.setHeader('X-XSS-Protection', '1; mode=block');
|
|
141
|
-
res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
|
|
142
|
-
|
|
143
|
-
// Content Security Policy
|
|
144
|
-
const csp = [
|
|
145
|
-
"default-src 'self'",
|
|
146
|
-
"script-src 'self' 'unsafe-inline' 'unsafe-eval'",
|
|
147
|
-
"style-src 'self' 'unsafe-inline' https://fonts.googleapis.com",
|
|
148
|
-
"font-src 'self' https://fonts.gstatic.com",
|
|
149
|
-
"img-src 'self' data: blob: https://www.google.com https://*.gstatic.com https://cdn.jsdelivr.net https://raw.githubusercontent.com",
|
|
150
|
-
"connect-src 'self' https://api.github.com https://generativelanguage.googleapis.com https://api.openai.com https://api.anthropic.com https://api.baserow.io",
|
|
151
|
-
"media-src 'self' blob:",
|
|
152
|
-
"frame-src 'self'"
|
|
153
|
-
].join('; ');
|
|
154
|
-
res.setHeader('Content-Security-Policy', csp);
|
|
155
|
-
|
|
156
|
-
if (SESSION_COOKIE_SECURE) {
|
|
157
|
-
res.setHeader('Strict-Transport-Security', 'max-age=31536000; includeSubDomains');
|
|
158
|
-
}
|
|
159
|
-
next();
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
app.use(express.json({ limit: '2mb' }));
|
|
163
|
-
|
|
164
|
-
const sessionStore = new FileStore({
|
|
165
|
-
path: SESSIONS_DIR,
|
|
166
|
-
ttl: SESSION_TTL_SECONDS,
|
|
167
|
-
retries: 5,
|
|
168
|
-
retryDelay: 100,
|
|
169
|
-
reapInterval: 3600,
|
|
170
|
-
logFn: () => { }
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
// Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
|
|
174
|
-
sessionStore.on('error', (err) => {
|
|
175
|
-
if (err && err.code === 'EPERM') return; // Silently ignore
|
|
176
|
-
if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
|
|
177
|
-
console.error('[SESSION] Store error:', err);
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
app.use(session({
|
|
181
|
-
store: sessionStore,
|
|
182
|
-
secret: SESSION_SECRET,
|
|
183
|
-
resave: true,
|
|
184
|
-
rolling: true,
|
|
185
|
-
saveUninitialized: false,
|
|
186
|
-
cookie: {
|
|
187
|
-
httpOnly: true,
|
|
188
|
-
secure: SESSION_COOKIE_SECURE,
|
|
189
|
-
sameSite: 'strict',
|
|
190
|
-
maxAge: SESSION_TTL_SECONDS * 1000
|
|
191
|
-
}
|
|
192
|
-
}));
|
|
193
|
-
|
|
194
|
-
app.use(csrfProtection);
|
|
195
|
-
|
|
196
|
-
// API Routes
|
|
197
|
-
app.use('/api/auth', authRoutes);
|
|
198
|
-
app.use('/api/settings', settingsRoutes);
|
|
199
|
-
app.use('/api/tasks', taskRoutes);
|
|
200
|
-
app.use('/api/executions', executionRoutes);
|
|
201
|
-
app.use('/api', dataRoutes);
|
|
202
|
-
app.use('/api/data', dataRoutes);
|
|
203
|
-
app.use('/api/schedules', scheduleRoutes);
|
|
204
|
-
app.use('/api/credentials', credentialRoutes);
|
|
205
|
-
app.use('/api/health', healthRoutes);
|
|
206
|
-
|
|
207
|
-
// View Routes & Static
|
|
208
|
-
app.use('/', viewRoutes);
|
|
209
|
-
|
|
210
|
-
// Execution Entry Points (Top-level routes kept for compatibility/simplicity)
|
|
211
|
-
const registerExecution = (req, res, baseMeta = {}) => {
|
|
212
|
-
// This is a simplified version of the one in server.js,
|
|
213
|
-
// relying on the fact that handleScrape/Agent/Headful will handle the response.
|
|
214
|
-
// However, the original registerExecution wrapped res.json to capture result
|
|
215
|
-
// and appended to execution log on finish.
|
|
216
|
-
// We need to restore that logic here or import it.
|
|
217
|
-
// Since it was local to server.js, I should probably implement it here or imports.
|
|
218
|
-
// It depends on `appendExecution`.
|
|
219
|
-
|
|
220
|
-
// For now, I will re-implement it here using imports.
|
|
221
|
-
const { appendExecution } = require('./src/server/storage');
|
|
222
|
-
|
|
223
|
-
const start = Date.now();
|
|
224
|
-
const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
|
|
225
|
-
res.locals.executionId = requestId;
|
|
226
|
-
const originalJson = res.json.bind(res);
|
|
227
|
-
res.json = (body) => {
|
|
228
|
-
res.locals.executionResult = body;
|
|
229
|
-
return originalJson(body);
|
|
230
|
-
};
|
|
231
|
-
res.on('finish', () => {
|
|
232
|
-
const durationMs = Date.now() - start;
|
|
233
|
-
const body = req.body || {};
|
|
234
|
-
const entry = {
|
|
235
|
-
id: requestId,
|
|
236
|
-
timestamp: start,
|
|
237
|
-
method: req.method,
|
|
238
|
-
path: req.path,
|
|
239
|
-
status: res.statusCode,
|
|
240
|
-
durationMs,
|
|
241
|
-
source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
|
|
242
|
-
mode: body.mode || baseMeta.mode || 'unknown',
|
|
243
|
-
taskId: body.taskId || baseMeta.taskId || null,
|
|
244
|
-
taskName: body.name || baseMeta.taskName || null,
|
|
245
|
-
url: body.url || req.query.url || null,
|
|
246
|
-
taskSnapshot: body.taskSnapshot || null,
|
|
247
|
-
result: res.locals.executionResult || null
|
|
248
|
-
};
|
|
249
|
-
appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
|
|
250
|
-
|
|
251
|
-
const outputConfig = body.output || (body.taskSnapshot && body.taskSnapshot.output);
|
|
252
|
-
if (outputConfig && entry.result) {
|
|
253
|
-
pushOutput(outputConfig, entry.result.data, requestId)
|
|
254
|
-
.catch(err => console.error('[OUTPUT] Unexpected error:', err));
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// Webhook callback: POST result to caller-provided URL
|
|
258
|
-
const webhookUrl = res.locals.webhookUrl;
|
|
259
|
-
if (webhookUrl && entry.result) {
|
|
260
|
-
const payload = JSON.stringify({
|
|
261
|
-
executionId: entry.id,
|
|
262
|
-
taskId: entry.taskId,
|
|
263
|
-
status: entry.status,
|
|
264
|
-
durationMs: entry.durationMs,
|
|
265
|
-
result: entry.result
|
|
266
|
-
});
|
|
267
|
-
fetchWithRedirectValidation(webhookUrl, {
|
|
268
|
-
method: 'POST',
|
|
269
|
-
headers: { 'Content-Type': 'application/json' },
|
|
270
|
-
body: payload,
|
|
271
|
-
signal: AbortSignal.timeout(10000)
|
|
272
|
-
}).catch(err => console.error('[WEBHOOK] Failed to deliver:', err.message));
|
|
273
|
-
}
|
|
274
|
-
});
|
|
275
|
-
};
|
|
276
|
-
|
|
277
|
-
const preprocessScrapeRequest = (req) => {
|
|
278
|
-
const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
|
|
279
|
-
let safeVars = vars;
|
|
280
|
-
if (typeof vars === 'string') {
|
|
281
|
-
try { safeVars = JSON.parse(vars); } catch { }
|
|
282
|
-
} else if (typeof vars !== 'object') {
|
|
283
|
-
safeVars = {};
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
const resolve = (str) => {
|
|
287
|
-
if (typeof str !== 'string') return str;
|
|
288
|
-
return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
|
|
289
|
-
if (name === 'now') return new Date().toISOString();
|
|
290
|
-
const value = safeVars[name];
|
|
291
|
-
if (value === undefined || value === null) return '';
|
|
292
|
-
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
293
|
-
return String(value);
|
|
294
|
-
}
|
|
295
|
-
try {
|
|
296
|
-
return JSON.stringify(value);
|
|
297
|
-
} catch {
|
|
298
|
-
return String(value);
|
|
299
|
-
}
|
|
300
|
-
});
|
|
301
|
-
};
|
|
302
|
-
|
|
303
|
-
if (req.body) {
|
|
304
|
-
if (req.body.url) req.body.url = resolve(req.body.url);
|
|
305
|
-
if (req.body.selector) req.body.selector = resolve(req.body.selector);
|
|
306
|
-
if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
|
|
307
|
-
}
|
|
308
|
-
if (req.query) {
|
|
309
|
-
if (req.query.url) req.query.url = resolve(req.query.url);
|
|
310
|
-
if (req.query.selector) req.query.selector = resolve(req.query.selector);
|
|
311
|
-
if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
|
|
312
|
-
}
|
|
313
|
-
};
|
|
314
|
-
|
|
315
|
-
const executeTaskById = async (req, res) => {
|
|
316
|
-
const taskId = req.params.id;
|
|
317
|
-
let task;
|
|
318
|
-
try {
|
|
319
|
-
await loadTasks();
|
|
320
|
-
task = getTaskById(taskId);
|
|
321
|
-
} catch (e) {
|
|
322
|
-
return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
if (!task) {
|
|
326
|
-
return res.status(404).json({ error: 'TASK_NOT_FOUND' });
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Webhook: validate and stash for post-execution delivery
|
|
330
|
-
const webhookUrl = req.body.webhookUrl;
|
|
331
|
-
if (webhookUrl) {
|
|
332
|
-
try {
|
|
333
|
-
await validateUrl(webhookUrl);
|
|
334
|
-
res.locals.webhookUrl = webhookUrl;
|
|
335
|
-
} catch (err) {
|
|
336
|
-
return res.status(400).json({ error: 'INVALID_WEBHOOK_URL', message: err.message });
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
|
|
341
|
-
|
|
342
|
-
const clientVars = req.body.variables || req.body.taskVariables || {};
|
|
343
|
-
const taskVars = {};
|
|
344
|
-
if (task.variables) {
|
|
345
|
-
for (const [key, v] of Object.entries(task.variables)) {
|
|
346
|
-
taskVars[key] = v.value;
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
const runtimeVars = { ...taskVars, ...clientVars };
|
|
350
|
-
|
|
351
|
-
req.body = {
|
|
352
|
-
...req.body,
|
|
353
|
-
...task,
|
|
354
|
-
url: req.body.url || task.url,
|
|
355
|
-
taskId: task.id,
|
|
356
|
-
variables: runtimeVars,
|
|
357
|
-
taskVariables: runtimeVars,
|
|
358
|
-
actions: task.actions || [],
|
|
359
|
-
mode: task.mode || 'agent',
|
|
360
|
-
extractionScript: req.body.extractionScript || task.extractionScript
|
|
361
|
-
};
|
|
362
|
-
|
|
363
|
-
if (task.mode === 'scrape') {
|
|
364
|
-
preprocessScrapeRequest(req);
|
|
365
|
-
return handleScrape(req, res);
|
|
366
|
-
} else if (task.mode === 'headful') {
|
|
367
|
-
if (req.body && typeof req.body.url === 'string') {
|
|
368
|
-
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
369
|
-
const value = runtimeVars[name];
|
|
370
|
-
if (value === undefined || value === null) return '';
|
|
371
|
-
return String(value);
|
|
372
|
-
});
|
|
373
|
-
}
|
|
374
|
-
return handleHeadful(req, res);
|
|
375
|
-
} else {
|
|
376
|
-
try {
|
|
377
|
-
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
378
|
-
if (runId) {
|
|
379
|
-
sendExecutionUpdate(runId, { status: 'started' });
|
|
380
|
-
}
|
|
381
|
-
} catch {
|
|
382
|
-
// ignore
|
|
383
|
-
}
|
|
384
|
-
return handleAgent(req, res);
|
|
385
|
-
}
|
|
386
|
-
};
|
|
387
|
-
|
|
388
|
-
app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, concurrencyGate, executeTaskById);
|
|
389
|
-
app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, concurrencyGate, executeTaskById);
|
|
390
|
-
|
|
391
|
-
app.all('/scrape', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
392
|
-
registerExecution(req, res, { mode: 'scrape' });
|
|
393
|
-
preprocessScrapeRequest(req);
|
|
394
|
-
return handleScrape(req, res);
|
|
395
|
-
});
|
|
396
|
-
app.all('/scraper', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
397
|
-
registerExecution(req, res, { mode: 'scrape' });
|
|
398
|
-
preprocessScrapeRequest(req);
|
|
399
|
-
return handleScrape(req, res);
|
|
400
|
-
});
|
|
401
|
-
app.all('/agent', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
402
|
-
registerExecution(req, res, { mode: 'agent' });
|
|
403
|
-
try {
|
|
404
|
-
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
405
|
-
if (runId) {
|
|
406
|
-
sendExecutionUpdate(runId, { status: 'started' });
|
|
407
|
-
}
|
|
408
|
-
} catch {
|
|
409
|
-
// ignore
|
|
410
|
-
}
|
|
411
|
-
return handleAgent(req, res);
|
|
412
|
-
});
|
|
413
|
-
app.post('/headful', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
414
|
-
registerExecution(req, res, { mode: 'headful' });
|
|
415
|
-
if (req.body) {
|
|
416
|
-
// Flatten variables from {type, value} objects to plain values
|
|
417
|
-
const rawVars = req.body.taskVariables || req.body.variables || {};
|
|
418
|
-
const vars = {};
|
|
419
|
-
for (const [key, v] of Object.entries(rawVars)) {
|
|
420
|
-
vars[key] = (v && typeof v === 'object' && 'value' in v) ? v.value : v;
|
|
421
|
-
}
|
|
422
|
-
if (req.body.variables) req.body.variables = vars;
|
|
423
|
-
if (req.body.taskVariables) req.body.taskVariables = vars;
|
|
424
|
-
if (typeof req.body.url === 'string') {
|
|
425
|
-
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
426
|
-
const value = vars[name];
|
|
427
|
-
if (value === undefined || value === null) return '';
|
|
428
|
-
return String(value);
|
|
429
|
-
});
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
return handleHeadful(req, res);
|
|
433
|
-
});
|
|
434
|
-
app.post('/headful/stop', requireAuth, stopHeadful);
|
|
435
|
-
|
|
436
|
-
// Ensure public/captures directory exists
|
|
437
|
-
const capturesDir = path.join(__dirname, 'public', 'captures');
|
|
438
|
-
if (!fs.existsSync(capturesDir)) {
|
|
439
|
-
fs.mkdirSync(capturesDir, { recursive: true });
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
// NoVNC Setup
|
|
443
|
-
const novncDirCandidates = [
|
|
444
|
-
'/opt/novnc',
|
|
445
|
-
'/usr/share/novnc'
|
|
446
|
-
];
|
|
447
|
-
const novncDir = novncDirCandidates.find((candidate) => {
|
|
448
|
-
try {
|
|
449
|
-
return fs.existsSync(candidate);
|
|
450
|
-
} catch {
|
|
451
|
-
return false;
|
|
452
|
-
}
|
|
453
|
-
});
|
|
454
|
-
const novncEnabled = !!novncDir;
|
|
455
|
-
if (novncDir) {
|
|
456
|
-
app.use('/novnc', express.static(novncDir));
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Static Files
|
|
460
|
-
app.use('/captures', requireAuthOrApiKey, express.static(capturesDir));
|
|
461
|
-
app.use('/screenshots', requireAuthOrApiKey, express.static(capturesDir));
|
|
462
|
-
app.use(express.static(DIST_DIR));
|
|
463
|
-
|
|
464
|
-
// Headful Status Endpoint
|
|
465
|
-
app.get('/api/headful/status', requireAuth, async (req, res) => {
|
|
466
|
-
if (!novncEnabled) {
|
|
467
|
-
return res.json({ useNovnc: false });
|
|
468
|
-
}
|
|
469
|
-
// Check if the novnc port is actually in use
|
|
470
|
-
const portAvailable = await isPortAvailable(NOVNC_PORT);
|
|
471
|
-
// If the port is NOT available, something (websockify) is listening on it
|
|
472
|
-
res.json({ useNovnc: !portAvailable });
|
|
473
|
-
});
|
|
474
|
-
|
|
475
|
-
app.get('/api/headful/selector_stream', requireAuth, (req, res) => {
|
|
476
|
-
res.setHeader('Content-Type', 'text/event-stream');
|
|
477
|
-
res.setHeader('Cache-Control', 'no-cache');
|
|
478
|
-
res.setHeader('Connection', 'keep-alive');
|
|
479
|
-
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
480
|
-
res.write('event: ready\ndata: {}\n\n');
|
|
481
|
-
|
|
482
|
-
const onSelectorSelected = (selector) => {
|
|
483
|
-
try {
|
|
484
|
-
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
485
|
-
} catch (err) {
|
|
486
|
-
// ignore
|
|
487
|
-
}
|
|
488
|
-
};
|
|
489
|
-
|
|
490
|
-
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
491
|
-
|
|
492
|
-
const keepAlive = setInterval(() => {
|
|
493
|
-
try {
|
|
494
|
-
res.write(':keep-alive\n\n');
|
|
495
|
-
} catch {
|
|
496
|
-
// ignore
|
|
497
|
-
}
|
|
498
|
-
}, 20000);
|
|
499
|
-
|
|
500
|
-
req.on('close', () => {
|
|
501
|
-
clearInterval(keepAlive);
|
|
502
|
-
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
503
|
-
});
|
|
504
|
-
});
|
|
505
|
-
|
|
506
|
-
app.get('/headful/selector_stream', requireAuth, (req, res) => {
|
|
507
|
-
res.setHeader('Content-Type', 'text/event-stream');
|
|
508
|
-
res.setHeader('Cache-Control', 'no-cache');
|
|
509
|
-
res.setHeader('Connection', 'keep-alive');
|
|
510
|
-
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
511
|
-
res.write('event: ready\ndata: {}\n\n');
|
|
512
|
-
|
|
513
|
-
const onSelectorSelected = (selector) => {
|
|
514
|
-
try {
|
|
515
|
-
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
516
|
-
} catch (err) {
|
|
517
|
-
// ignore
|
|
518
|
-
}
|
|
519
|
-
};
|
|
520
|
-
|
|
521
|
-
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
522
|
-
|
|
523
|
-
const keepAlive = setInterval(() => {
|
|
524
|
-
try {
|
|
525
|
-
res.write(':keep-alive\n\n');
|
|
526
|
-
} catch {
|
|
527
|
-
// ignore
|
|
528
|
-
}
|
|
529
|
-
}, 20000);
|
|
530
|
-
|
|
531
|
-
req.on('close', () => {
|
|
532
|
-
clearInterval(keepAlive);
|
|
533
|
-
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
534
|
-
});
|
|
535
|
-
});
|
|
536
|
-
|
|
537
|
-
app.post('/api/headful/inspect', requireAuth, toggleInspectMode);
|
|
538
|
-
app.post('/headful/inspect', requireAuth, toggleInspectMode);
|
|
539
|
-
|
|
540
|
-
// Start Server
|
|
541
|
-
findAvailablePort(port, 20)
|
|
542
|
-
.then((availablePort) => {
|
|
543
|
-
if (availablePort !== port) {
|
|
544
|
-
console.log(`Port ${port} in use, switched to ${availablePort}.`);
|
|
545
|
-
}
|
|
546
|
-
const server = app.listen(availablePort, '0.0.0.0', () => {
|
|
547
|
-
const address = server.address();
|
|
548
|
-
const displayPort = typeof address === 'object' && address ? address.port : availablePort;
|
|
549
|
-
console.log(`Server running at http://localhost:${displayPort}`);
|
|
550
|
-
|
|
551
|
-
// One-time migration of storage_state.json cookies into persistent browser profiles
|
|
552
|
-
migrateStorageState().catch(err => console.error('[MIGRATION] Failed:', err.message));
|
|
553
|
-
|
|
554
|
-
// Start the cron scheduler
|
|
555
|
-
const { startScheduler } = require('./src/server/scheduler');
|
|
556
|
-
startScheduler().catch(err => console.error('[SCHEDULER] Failed to start:', err.message));
|
|
557
|
-
});
|
|
558
|
-
server.on('upgrade', async (req, socket, head) => {
|
|
559
|
-
if (!await isIpAllowed(req.socket?.remoteAddress)) {
|
|
560
|
-
try {
|
|
561
|
-
socket.destroy();
|
|
562
|
-
} catch {
|
|
563
|
-
// ignore
|
|
564
|
-
}
|
|
565
|
-
return;
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
// Cross-Site WebSocket Hijacking (CSWSH) protection: verify Origin header matches Host
|
|
569
|
-
if (!isValidWebSocketOrigin(req.headers.origin, req.headers.host)) {
|
|
570
|
-
console.warn(`[SECURITY] CSWSH attempt blocked: Origin ${req.headers.origin} mismatch with Host ${req.headers.host}`);
|
|
571
|
-
socket.destroy();
|
|
572
|
-
return;
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
const handled = proxyWebsockify(req, socket, head);
|
|
576
|
-
if (!handled) {
|
|
577
|
-
socket.destroy();
|
|
578
|
-
}
|
|
579
|
-
});
|
|
580
|
-
server.on('error', (err) => {
|
|
581
|
-
console.error('Server failed to start:', err.message || err);
|
|
582
|
-
process.exit(1);
|
|
583
|
-
});
|
|
584
|
-
|
|
585
|
-
// Graceful shutdown handler
|
|
586
|
-
let shutdownInProgress = false;
|
|
587
|
-
const gracefulShutdown = async (signal) => {
|
|
588
|
-
if (shutdownInProgress) return;
|
|
589
|
-
shutdownInProgress = true;
|
|
590
|
-
console.log(`[SHUTDOWN] Received ${signal}, shutting down gracefully...`);
|
|
591
|
-
|
|
592
|
-
// Stop accepting new connections
|
|
593
|
-
server.close(() => {
|
|
594
|
-
console.log('[SHUTDOWN] HTTP server closed.');
|
|
595
|
-
});
|
|
596
|
-
|
|
597
|
-
// Stop scheduler
|
|
598
|
-
try {
|
|
599
|
-
const { stopScheduler } = require('./src/server/scheduler');
|
|
600
|
-
stopScheduler();
|
|
601
|
-
} catch { }
|
|
602
|
-
|
|
603
|
-
// Flush pending execution writes
|
|
604
|
-
try {
|
|
605
|
-
const { flushExecutions } = require('./src/server/storage');
|
|
606
|
-
if (flushExecutions) await flushExecutions();
|
|
607
|
-
} catch { }
|
|
608
|
-
|
|
609
|
-
// Close database pool
|
|
610
|
-
try {
|
|
611
|
-
const { getPool } = require('./src/server/db');
|
|
612
|
-
const pool = getPool();
|
|
613
|
-
if (pool) await pool.end();
|
|
614
|
-
} catch { }
|
|
615
|
-
|
|
616
|
-
console.log('[SHUTDOWN] Cleanup complete.');
|
|
617
|
-
process.exit(0);
|
|
618
|
-
};
|
|
619
|
-
|
|
620
|
-
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
|
|
621
|
-
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
|
|
622
|
-
})
|
|
623
|
-
.catch((err) => {
|
|
624
|
-
console.error('Server failed to start:', err.message || err);
|
|
625
|
-
process.exit(1);
|
|
626
|
-
});
|
|
1
|
+
const express = require('express');
|
|
2
|
+
const session = require('express-session');
|
|
3
|
+
const FileStore = require('session-file-store')(session);
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const crypto = require('crypto');
|
|
7
|
+
|
|
8
|
+
// Catch unhandled promise rejections from playwright-extra stealth plugin.
|
|
9
|
+
// When pages close before the plugin finishes async CDP initialization,
|
|
10
|
+
// benign rejections bubble up and would otherwise crash the process.
|
|
11
|
+
process.on('unhandledRejection', (reason) => {
|
|
12
|
+
const msg = reason && reason.message ? reason.message : String(reason);
|
|
13
|
+
if (/Target page, context or browser has been closed/i.test(msg)) {
|
|
14
|
+
console.warn('[STEALTH] Suppressed benign rejection:', msg);
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
console.error('Unhandled rejection:', reason);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
// Constants
|
|
21
|
+
const {
|
|
22
|
+
DEFAULT_PORT,
|
|
23
|
+
DIST_DIR,
|
|
24
|
+
DATA_DIR,
|
|
25
|
+
SESSIONS_DIR,
|
|
26
|
+
SESSION_SECRET_FILE,
|
|
27
|
+
SESSION_TTL_SECONDS,
|
|
28
|
+
NOVNC_PORT,
|
|
29
|
+
WEBSOCKIFY_PATH
|
|
30
|
+
} = require('./src/server/constants');
|
|
31
|
+
|
|
32
|
+
const {
|
|
33
|
+
loadTasks,
|
|
34
|
+
getTaskById
|
|
35
|
+
} = require('./src/server/storage');
|
|
36
|
+
|
|
37
|
+
// Context & Utils
|
|
38
|
+
const {
|
|
39
|
+
executionStreams,
|
|
40
|
+
stopRequests,
|
|
41
|
+
sendExecutionUpdate
|
|
42
|
+
} = require('./src/server/state');
|
|
43
|
+
const {
|
|
44
|
+
findAvailablePort,
|
|
45
|
+
proxyWebsockify,
|
|
46
|
+
isPortAvailable
|
|
47
|
+
} = require('./src/server/utils');
|
|
48
|
+
const { isValidWebSocketOrigin, fetchWithRedirectValidation } = require('./url-utils');
|
|
49
|
+
|
|
50
|
+
// Middleware
|
|
51
|
+
const {
|
|
52
|
+
authRateLimiter,
|
|
53
|
+
dataRateLimiter,
|
|
54
|
+
csrfProtection,
|
|
55
|
+
requireIpAllowlist,
|
|
56
|
+
requireAuth,
|
|
57
|
+
isIpAllowed,
|
|
58
|
+
requireApiKey,
|
|
59
|
+
requireAuthOrApiKey
|
|
60
|
+
} = require('./src/server/middleware');
|
|
61
|
+
|
|
62
|
+
// Feature Modules (Legacy/Existing)
|
|
63
|
+
const { handleScrape } = require('./scrape');
|
|
64
|
+
const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
|
|
65
|
+
const { handleHeadful, stopHeadful, toggleInspectMode, headfulEventEmitter } = require('./headful');
|
|
66
|
+
|
|
67
|
+
// Routes
|
|
68
|
+
const authRoutes = require('./src/server/routes/auth');
|
|
69
|
+
const settingsRoutes = require('./src/server/routes/settings');
|
|
70
|
+
const taskRoutes = require('./src/server/routes/tasks');
|
|
71
|
+
const executionRoutes = require('./src/server/routes/executions');
|
|
72
|
+
const dataRoutes = require('./src/server/routes/data');
|
|
73
|
+
const viewRoutes = require('./src/server/routes/views');
|
|
74
|
+
const scheduleRoutes = require('./src/server/routes/schedules');
|
|
75
|
+
const credentialRoutes = require('./src/server/routes/credentials');
|
|
76
|
+
const healthRoutes = require('./src/server/routes/health');
|
|
77
|
+
const { pushOutput } = require('./src/server/outputProviders');
|
|
78
|
+
const { migrateStorageState } = require('./src/server/migrate-storage');
|
|
79
|
+
const { concurrencyGate } = require('./src/server/execution-queue');
|
|
80
|
+
const { validateUrl } = require('./url-utils');
|
|
81
|
+
|
|
82
|
+
const app = express();
|
|
83
|
+
app.disable('x-powered-by');
|
|
84
|
+
const port = Number(process.env.PORT) || DEFAULT_PORT;
|
|
85
|
+
|
|
86
|
+
// Session Secret Setup
|
|
87
|
+
let SESSION_SECRET = process.env.SESSION_SECRET;
|
|
88
|
+
if (!SESSION_SECRET) {
|
|
89
|
+
try {
|
|
90
|
+
if (fs.existsSync(SESSION_SECRET_FILE)) {
|
|
91
|
+
SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
|
|
92
|
+
} else {
|
|
93
|
+
// Generate secret using crypto.randomBytes
|
|
94
|
+
SESSION_SECRET = crypto.randomBytes(48).toString('hex');
|
|
95
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
96
|
+
fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
|
|
97
|
+
}
|
|
98
|
+
} catch (e) {
|
|
99
|
+
console.warn('Failed to load session secret from disk, falling back to process env only.');
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (!SESSION_SECRET) {
|
|
103
|
+
throw new Error('SESSION_SECRET environment variable is required');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Ensure Directories
|
|
107
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
108
|
+
if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
|
|
109
|
+
|
|
110
|
+
// Trust Proxy
|
|
111
|
+
const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
|
|
112
|
+
if (TRUST_PROXY) {
|
|
113
|
+
app.set('trust proxy', true);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Session Cookie Secure
|
|
117
|
+
const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
|
|
118
|
+
if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
|
|
119
|
+
console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Wire up Agent Callbacks
|
|
123
|
+
setProgressReporter(sendExecutionUpdate);
|
|
124
|
+
setStopChecker((runId) => {
|
|
125
|
+
if (!runId) return false;
|
|
126
|
+
if (stopRequests.has(runId)) {
|
|
127
|
+
stopRequests.delete(runId);
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
return false;
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// App Middleware
|
|
134
|
+
app.use(requireIpAllowlist);
|
|
135
|
+
|
|
136
|
+
// Security Headers
|
|
137
|
+
app.use((req, res, next) => {
|
|
138
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
139
|
+
res.setHeader('X-Frame-Options', 'SAMEORIGIN');
|
|
140
|
+
res.setHeader('X-XSS-Protection', '1; mode=block');
|
|
141
|
+
res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
|
|
142
|
+
|
|
143
|
+
// Content Security Policy
|
|
144
|
+
const csp = [
|
|
145
|
+
"default-src 'self'",
|
|
146
|
+
"script-src 'self' 'unsafe-inline' 'unsafe-eval'",
|
|
147
|
+
"style-src 'self' 'unsafe-inline' https://fonts.googleapis.com",
|
|
148
|
+
"font-src 'self' https://fonts.gstatic.com",
|
|
149
|
+
"img-src 'self' data: blob: https://www.google.com https://*.gstatic.com https://cdn.jsdelivr.net https://raw.githubusercontent.com",
|
|
150
|
+
"connect-src 'self' https://api.github.com https://generativelanguage.googleapis.com https://api.openai.com https://api.anthropic.com https://api.baserow.io",
|
|
151
|
+
"media-src 'self' blob:",
|
|
152
|
+
"frame-src 'self'"
|
|
153
|
+
].join('; ');
|
|
154
|
+
res.setHeader('Content-Security-Policy', csp);
|
|
155
|
+
|
|
156
|
+
if (SESSION_COOKIE_SECURE) {
|
|
157
|
+
res.setHeader('Strict-Transport-Security', 'max-age=31536000; includeSubDomains');
|
|
158
|
+
}
|
|
159
|
+
next();
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
app.use(express.json({ limit: '2mb' }));
|
|
163
|
+
|
|
164
|
+
const sessionStore = new FileStore({
|
|
165
|
+
path: SESSIONS_DIR,
|
|
166
|
+
ttl: SESSION_TTL_SECONDS,
|
|
167
|
+
retries: 5,
|
|
168
|
+
retryDelay: 100,
|
|
169
|
+
reapInterval: 3600,
|
|
170
|
+
logFn: () => { }
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
|
|
174
|
+
sessionStore.on('error', (err) => {
|
|
175
|
+
if (err && err.code === 'EPERM') return; // Silently ignore
|
|
176
|
+
if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
|
|
177
|
+
console.error('[SESSION] Store error:', err);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
app.use(session({
|
|
181
|
+
store: sessionStore,
|
|
182
|
+
secret: SESSION_SECRET,
|
|
183
|
+
resave: true,
|
|
184
|
+
rolling: true,
|
|
185
|
+
saveUninitialized: false,
|
|
186
|
+
cookie: {
|
|
187
|
+
httpOnly: true,
|
|
188
|
+
secure: SESSION_COOKIE_SECURE,
|
|
189
|
+
sameSite: 'strict',
|
|
190
|
+
maxAge: SESSION_TTL_SECONDS * 1000
|
|
191
|
+
}
|
|
192
|
+
}));
|
|
193
|
+
|
|
194
|
+
app.use(csrfProtection);
|
|
195
|
+
|
|
196
|
+
// API Routes
|
|
197
|
+
app.use('/api/auth', authRoutes);
|
|
198
|
+
app.use('/api/settings', settingsRoutes);
|
|
199
|
+
app.use('/api/tasks', taskRoutes);
|
|
200
|
+
app.use('/api/executions', executionRoutes);
|
|
201
|
+
app.use('/api', dataRoutes);
|
|
202
|
+
app.use('/api/data', dataRoutes);
|
|
203
|
+
app.use('/api/schedules', scheduleRoutes);
|
|
204
|
+
app.use('/api/credentials', credentialRoutes);
|
|
205
|
+
app.use('/api/health', healthRoutes);
|
|
206
|
+
|
|
207
|
+
// View Routes & Static
|
|
208
|
+
app.use('/', viewRoutes);
|
|
209
|
+
|
|
210
|
+
// Execution Entry Points (Top-level routes kept for compatibility/simplicity)
|
|
211
|
+
const registerExecution = (req, res, baseMeta = {}) => {
|
|
212
|
+
// This is a simplified version of the one in server.js,
|
|
213
|
+
// relying on the fact that handleScrape/Agent/Headful will handle the response.
|
|
214
|
+
// However, the original registerExecution wrapped res.json to capture result
|
|
215
|
+
// and appended to execution log on finish.
|
|
216
|
+
// We need to restore that logic here or import it.
|
|
217
|
+
// Since it was local to server.js, I should probably implement it here or imports.
|
|
218
|
+
// It depends on `appendExecution`.
|
|
219
|
+
|
|
220
|
+
// For now, I will re-implement it here using imports.
|
|
221
|
+
const { appendExecution } = require('./src/server/storage');
|
|
222
|
+
|
|
223
|
+
const start = Date.now();
|
|
224
|
+
const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
|
|
225
|
+
res.locals.executionId = requestId;
|
|
226
|
+
const originalJson = res.json.bind(res);
|
|
227
|
+
res.json = (body) => {
|
|
228
|
+
res.locals.executionResult = body;
|
|
229
|
+
return originalJson(body);
|
|
230
|
+
};
|
|
231
|
+
res.on('finish', () => {
|
|
232
|
+
const durationMs = Date.now() - start;
|
|
233
|
+
const body = req.body || {};
|
|
234
|
+
const entry = {
|
|
235
|
+
id: requestId,
|
|
236
|
+
timestamp: start,
|
|
237
|
+
method: req.method,
|
|
238
|
+
path: req.path,
|
|
239
|
+
status: res.statusCode,
|
|
240
|
+
durationMs,
|
|
241
|
+
source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
|
|
242
|
+
mode: body.mode || baseMeta.mode || 'unknown',
|
|
243
|
+
taskId: body.taskId || baseMeta.taskId || null,
|
|
244
|
+
taskName: body.name || baseMeta.taskName || null,
|
|
245
|
+
url: body.url || req.query.url || null,
|
|
246
|
+
taskSnapshot: body.taskSnapshot || null,
|
|
247
|
+
result: res.locals.executionResult || null
|
|
248
|
+
};
|
|
249
|
+
appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
|
|
250
|
+
|
|
251
|
+
const outputConfig = body.output || (body.taskSnapshot && body.taskSnapshot.output);
|
|
252
|
+
if (outputConfig && entry.result) {
|
|
253
|
+
pushOutput(outputConfig, entry.result.data, requestId)
|
|
254
|
+
.catch(err => console.error('[OUTPUT] Unexpected error:', err));
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Webhook callback: POST result to caller-provided URL
|
|
258
|
+
const webhookUrl = res.locals.webhookUrl;
|
|
259
|
+
if (webhookUrl && entry.result) {
|
|
260
|
+
const payload = JSON.stringify({
|
|
261
|
+
executionId: entry.id,
|
|
262
|
+
taskId: entry.taskId,
|
|
263
|
+
status: entry.status,
|
|
264
|
+
durationMs: entry.durationMs,
|
|
265
|
+
result: entry.result
|
|
266
|
+
});
|
|
267
|
+
fetchWithRedirectValidation(webhookUrl, {
|
|
268
|
+
method: 'POST',
|
|
269
|
+
headers: { 'Content-Type': 'application/json' },
|
|
270
|
+
body: payload,
|
|
271
|
+
signal: AbortSignal.timeout(10000)
|
|
272
|
+
}).catch(err => console.error('[WEBHOOK] Failed to deliver:', err.message));
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
const preprocessScrapeRequest = (req) => {
|
|
278
|
+
const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
|
|
279
|
+
let safeVars = vars;
|
|
280
|
+
if (typeof vars === 'string') {
|
|
281
|
+
try { safeVars = JSON.parse(vars); } catch { }
|
|
282
|
+
} else if (typeof vars !== 'object') {
|
|
283
|
+
safeVars = {};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const resolve = (str) => {
|
|
287
|
+
if (typeof str !== 'string') return str;
|
|
288
|
+
return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
|
|
289
|
+
if (name === 'now') return new Date().toISOString();
|
|
290
|
+
const value = safeVars[name];
|
|
291
|
+
if (value === undefined || value === null) return '';
|
|
292
|
+
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
293
|
+
return String(value);
|
|
294
|
+
}
|
|
295
|
+
try {
|
|
296
|
+
return JSON.stringify(value);
|
|
297
|
+
} catch {
|
|
298
|
+
return String(value);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
if (req.body) {
|
|
304
|
+
if (req.body.url) req.body.url = resolve(req.body.url);
|
|
305
|
+
if (req.body.selector) req.body.selector = resolve(req.body.selector);
|
|
306
|
+
if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
|
|
307
|
+
}
|
|
308
|
+
if (req.query) {
|
|
309
|
+
if (req.query.url) req.query.url = resolve(req.query.url);
|
|
310
|
+
if (req.query.selector) req.query.selector = resolve(req.query.selector);
|
|
311
|
+
if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
const executeTaskById = async (req, res) => {
|
|
316
|
+
const taskId = req.params.id;
|
|
317
|
+
let task;
|
|
318
|
+
try {
|
|
319
|
+
await loadTasks();
|
|
320
|
+
task = getTaskById(taskId);
|
|
321
|
+
} catch (e) {
|
|
322
|
+
return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (!task) {
|
|
326
|
+
return res.status(404).json({ error: 'TASK_NOT_FOUND' });
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Webhook: validate and stash for post-execution delivery
|
|
330
|
+
const webhookUrl = req.body.webhookUrl;
|
|
331
|
+
if (webhookUrl) {
|
|
332
|
+
try {
|
|
333
|
+
await validateUrl(webhookUrl);
|
|
334
|
+
res.locals.webhookUrl = webhookUrl;
|
|
335
|
+
} catch (err) {
|
|
336
|
+
return res.status(400).json({ error: 'INVALID_WEBHOOK_URL', message: err.message });
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
|
|
341
|
+
|
|
342
|
+
const clientVars = req.body.variables || req.body.taskVariables || {};
|
|
343
|
+
const taskVars = {};
|
|
344
|
+
if (task.variables) {
|
|
345
|
+
for (const [key, v] of Object.entries(task.variables)) {
|
|
346
|
+
taskVars[key] = v.value;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
const runtimeVars = { ...taskVars, ...clientVars };
|
|
350
|
+
|
|
351
|
+
req.body = {
|
|
352
|
+
...req.body,
|
|
353
|
+
...task,
|
|
354
|
+
url: req.body.url || task.url,
|
|
355
|
+
taskId: task.id,
|
|
356
|
+
variables: runtimeVars,
|
|
357
|
+
taskVariables: runtimeVars,
|
|
358
|
+
actions: task.actions || [],
|
|
359
|
+
mode: task.mode || 'agent',
|
|
360
|
+
extractionScript: req.body.extractionScript || task.extractionScript
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
if (task.mode === 'scrape') {
|
|
364
|
+
preprocessScrapeRequest(req);
|
|
365
|
+
return handleScrape(req, res);
|
|
366
|
+
} else if (task.mode === 'headful') {
|
|
367
|
+
if (req.body && typeof req.body.url === 'string') {
|
|
368
|
+
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
369
|
+
const value = runtimeVars[name];
|
|
370
|
+
if (value === undefined || value === null) return '';
|
|
371
|
+
return String(value);
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
return handleHeadful(req, res);
|
|
375
|
+
} else {
|
|
376
|
+
try {
|
|
377
|
+
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
378
|
+
if (runId) {
|
|
379
|
+
sendExecutionUpdate(runId, { status: 'started' });
|
|
380
|
+
}
|
|
381
|
+
} catch {
|
|
382
|
+
// ignore
|
|
383
|
+
}
|
|
384
|
+
return handleAgent(req, res);
|
|
385
|
+
}
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, concurrencyGate, executeTaskById);
|
|
389
|
+
app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, concurrencyGate, executeTaskById);
|
|
390
|
+
|
|
391
|
+
app.all('/scrape', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
392
|
+
registerExecution(req, res, { mode: 'scrape' });
|
|
393
|
+
preprocessScrapeRequest(req);
|
|
394
|
+
return handleScrape(req, res);
|
|
395
|
+
});
|
|
396
|
+
app.all('/scraper', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
397
|
+
registerExecution(req, res, { mode: 'scrape' });
|
|
398
|
+
preprocessScrapeRequest(req);
|
|
399
|
+
return handleScrape(req, res);
|
|
400
|
+
});
|
|
401
|
+
app.all('/agent', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
402
|
+
registerExecution(req, res, { mode: 'agent' });
|
|
403
|
+
try {
|
|
404
|
+
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
405
|
+
if (runId) {
|
|
406
|
+
sendExecutionUpdate(runId, { status: 'started' });
|
|
407
|
+
}
|
|
408
|
+
} catch {
|
|
409
|
+
// ignore
|
|
410
|
+
}
|
|
411
|
+
return handleAgent(req, res);
|
|
412
|
+
});
|
|
413
|
+
app.post('/headful', requireAuth, dataRateLimiter, concurrencyGate, (req, res) => {
|
|
414
|
+
registerExecution(req, res, { mode: 'headful' });
|
|
415
|
+
if (req.body) {
|
|
416
|
+
// Flatten variables from {type, value} objects to plain values
|
|
417
|
+
const rawVars = req.body.taskVariables || req.body.variables || {};
|
|
418
|
+
const vars = {};
|
|
419
|
+
for (const [key, v] of Object.entries(rawVars)) {
|
|
420
|
+
vars[key] = (v && typeof v === 'object' && 'value' in v) ? v.value : v;
|
|
421
|
+
}
|
|
422
|
+
if (req.body.variables) req.body.variables = vars;
|
|
423
|
+
if (req.body.taskVariables) req.body.taskVariables = vars;
|
|
424
|
+
if (typeof req.body.url === 'string') {
|
|
425
|
+
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
426
|
+
const value = vars[name];
|
|
427
|
+
if (value === undefined || value === null) return '';
|
|
428
|
+
return String(value);
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
return handleHeadful(req, res);
|
|
433
|
+
});
|
|
434
|
+
app.post('/headful/stop', requireAuth, stopHeadful);
|
|
435
|
+
|
|
436
|
+
// Ensure public/captures directory exists
|
|
437
|
+
const capturesDir = path.join(__dirname, 'public', 'captures');
|
|
438
|
+
if (!fs.existsSync(capturesDir)) {
|
|
439
|
+
fs.mkdirSync(capturesDir, { recursive: true });
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// NoVNC Setup
|
|
443
|
+
const novncDirCandidates = [
|
|
444
|
+
'/opt/novnc',
|
|
445
|
+
'/usr/share/novnc'
|
|
446
|
+
];
|
|
447
|
+
const novncDir = novncDirCandidates.find((candidate) => {
|
|
448
|
+
try {
|
|
449
|
+
return fs.existsSync(candidate);
|
|
450
|
+
} catch {
|
|
451
|
+
return false;
|
|
452
|
+
}
|
|
453
|
+
});
|
|
454
|
+
const novncEnabled = !!novncDir;
|
|
455
|
+
if (novncDir) {
|
|
456
|
+
app.use('/novnc', express.static(novncDir));
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Static Files
|
|
460
|
+
app.use('/captures', requireAuthOrApiKey, express.static(capturesDir));
|
|
461
|
+
app.use('/screenshots', requireAuthOrApiKey, express.static(capturesDir));
|
|
462
|
+
app.use(express.static(DIST_DIR));
|
|
463
|
+
|
|
464
|
+
// Headful Status Endpoint
|
|
465
|
+
app.get('/api/headful/status', requireAuth, async (req, res) => {
|
|
466
|
+
if (!novncEnabled) {
|
|
467
|
+
return res.json({ useNovnc: false });
|
|
468
|
+
}
|
|
469
|
+
// Check if the novnc port is actually in use
|
|
470
|
+
const portAvailable = await isPortAvailable(NOVNC_PORT);
|
|
471
|
+
// If the port is NOT available, something (websockify) is listening on it
|
|
472
|
+
res.json({ useNovnc: !portAvailable });
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
app.get('/api/headful/selector_stream', requireAuth, (req, res) => {
|
|
476
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
477
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
478
|
+
res.setHeader('Connection', 'keep-alive');
|
|
479
|
+
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
480
|
+
res.write('event: ready\ndata: {}\n\n');
|
|
481
|
+
|
|
482
|
+
const onSelectorSelected = (selector) => {
|
|
483
|
+
try {
|
|
484
|
+
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
485
|
+
} catch (err) {
|
|
486
|
+
// ignore
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
491
|
+
|
|
492
|
+
const keepAlive = setInterval(() => {
|
|
493
|
+
try {
|
|
494
|
+
res.write(':keep-alive\n\n');
|
|
495
|
+
} catch {
|
|
496
|
+
// ignore
|
|
497
|
+
}
|
|
498
|
+
}, 20000);
|
|
499
|
+
|
|
500
|
+
req.on('close', () => {
|
|
501
|
+
clearInterval(keepAlive);
|
|
502
|
+
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
503
|
+
});
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
app.get('/headful/selector_stream', requireAuth, (req, res) => {
|
|
507
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
508
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
509
|
+
res.setHeader('Connection', 'keep-alive');
|
|
510
|
+
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
511
|
+
res.write('event: ready\ndata: {}\n\n');
|
|
512
|
+
|
|
513
|
+
const onSelectorSelected = (selector) => {
|
|
514
|
+
try {
|
|
515
|
+
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
516
|
+
} catch (err) {
|
|
517
|
+
// ignore
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
522
|
+
|
|
523
|
+
const keepAlive = setInterval(() => {
|
|
524
|
+
try {
|
|
525
|
+
res.write(':keep-alive\n\n');
|
|
526
|
+
} catch {
|
|
527
|
+
// ignore
|
|
528
|
+
}
|
|
529
|
+
}, 20000);
|
|
530
|
+
|
|
531
|
+
req.on('close', () => {
|
|
532
|
+
clearInterval(keepAlive);
|
|
533
|
+
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
534
|
+
});
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
app.post('/api/headful/inspect', requireAuth, toggleInspectMode);
|
|
538
|
+
app.post('/headful/inspect', requireAuth, toggleInspectMode);
|
|
539
|
+
|
|
540
|
+
// Start Server
|
|
541
|
+
findAvailablePort(port, 20)
|
|
542
|
+
.then((availablePort) => {
|
|
543
|
+
if (availablePort !== port) {
|
|
544
|
+
console.log(`Port ${port} in use, switched to ${availablePort}.`);
|
|
545
|
+
}
|
|
546
|
+
const server = app.listen(availablePort, '0.0.0.0', () => {
|
|
547
|
+
const address = server.address();
|
|
548
|
+
const displayPort = typeof address === 'object' && address ? address.port : availablePort;
|
|
549
|
+
console.log(`Server running at http://localhost:${displayPort}`);
|
|
550
|
+
|
|
551
|
+
// One-time migration of storage_state.json cookies into persistent browser profiles
|
|
552
|
+
migrateStorageState().catch(err => console.error('[MIGRATION] Failed:', err.message));
|
|
553
|
+
|
|
554
|
+
// Start the cron scheduler
|
|
555
|
+
const { startScheduler } = require('./src/server/scheduler');
|
|
556
|
+
startScheduler().catch(err => console.error('[SCHEDULER] Failed to start:', err.message));
|
|
557
|
+
});
|
|
558
|
+
server.on('upgrade', async (req, socket, head) => {
|
|
559
|
+
if (!await isIpAllowed(req.socket?.remoteAddress)) {
|
|
560
|
+
try {
|
|
561
|
+
socket.destroy();
|
|
562
|
+
} catch {
|
|
563
|
+
// ignore
|
|
564
|
+
}
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Cross-Site WebSocket Hijacking (CSWSH) protection: verify Origin header matches Host
|
|
569
|
+
if (!isValidWebSocketOrigin(req.headers.origin, req.headers.host)) {
|
|
570
|
+
console.warn(`[SECURITY] CSWSH attempt blocked: Origin ${req.headers.origin} mismatch with Host ${req.headers.host}`);
|
|
571
|
+
socket.destroy();
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const handled = proxyWebsockify(req, socket, head);
|
|
576
|
+
if (!handled) {
|
|
577
|
+
socket.destroy();
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
server.on('error', (err) => {
|
|
581
|
+
console.error('Server failed to start:', err.message || err);
|
|
582
|
+
process.exit(1);
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
// Graceful shutdown handler
|
|
586
|
+
let shutdownInProgress = false;
|
|
587
|
+
const gracefulShutdown = async (signal) => {
|
|
588
|
+
if (shutdownInProgress) return;
|
|
589
|
+
shutdownInProgress = true;
|
|
590
|
+
console.log(`[SHUTDOWN] Received ${signal}, shutting down gracefully...`);
|
|
591
|
+
|
|
592
|
+
// Stop accepting new connections
|
|
593
|
+
server.close(() => {
|
|
594
|
+
console.log('[SHUTDOWN] HTTP server closed.');
|
|
595
|
+
});
|
|
596
|
+
|
|
597
|
+
// Stop scheduler
|
|
598
|
+
try {
|
|
599
|
+
const { stopScheduler } = require('./src/server/scheduler');
|
|
600
|
+
stopScheduler();
|
|
601
|
+
} catch { }
|
|
602
|
+
|
|
603
|
+
// Flush pending execution writes
|
|
604
|
+
try {
|
|
605
|
+
const { flushExecutions } = require('./src/server/storage');
|
|
606
|
+
if (flushExecutions) await flushExecutions();
|
|
607
|
+
} catch { }
|
|
608
|
+
|
|
609
|
+
// Close database pool
|
|
610
|
+
try {
|
|
611
|
+
const { getPool } = require('./src/server/db');
|
|
612
|
+
const pool = getPool();
|
|
613
|
+
if (pool) await pool.end();
|
|
614
|
+
} catch { }
|
|
615
|
+
|
|
616
|
+
console.log('[SHUTDOWN] Cleanup complete.');
|
|
617
|
+
process.exit(0);
|
|
618
|
+
};
|
|
619
|
+
|
|
620
|
+
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
|
|
621
|
+
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
|
|
622
|
+
})
|
|
623
|
+
.catch((err) => {
|
|
624
|
+
console.error('Server failed to start:', err.message || err);
|
|
625
|
+
process.exit(1);
|
|
626
|
+
});
|