figranium 0.9.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,404 +1,517 @@
1
- const express = require('express');
2
- const session = require('express-session');
3
- const FileStore = require('session-file-store')(session);
4
- const fs = require('fs');
5
- const path = require('path');
6
- const crypto = require('crypto');
7
-
8
- // Constants
9
- const {
10
- DEFAULT_PORT,
11
- DIST_DIR,
12
- DATA_DIR,
13
- SESSIONS_DIR,
14
- SESSION_SECRET_FILE,
15
- SESSION_TTL_SECONDS,
16
- NOVNC_PORT,
17
- WEBSOCKIFY_PATH
18
- } = require('./src/server/constants');
19
-
20
- const {
21
- loadTasks,
22
- getTaskById
23
- } = require('./src/server/storage');
24
-
25
- // Context & Utils
26
- const {
27
- executionStreams,
28
- stopRequests,
29
- sendExecutionUpdate
30
- } = require('./src/server/state');
31
- const {
32
- findAvailablePort,
33
- proxyWebsockify
34
- } = require('./src/server/utils');
35
-
36
- // Middleware
37
- const {
38
- authRateLimiter,
39
- dataRateLimiter,
40
- csrfProtection,
41
- requireIpAllowlist,
42
- requireAuth,
43
- isIpAllowed,
44
- requireApiKey
45
- } = require('./src/server/middleware');
46
-
47
- // Feature Modules (Legacy/Existing)
48
- const { handleScrape } = require('./scrape');
49
- const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
50
- const { handleHeadful, stopHeadful } = require('./headful');
51
-
52
- // Routes
53
- const authRoutes = require('./src/server/routes/auth');
54
- const settingsRoutes = require('./src/server/routes/settings');
55
- const taskRoutes = require('./src/server/routes/tasks');
56
- const executionRoutes = require('./src/server/routes/executions');
57
- const dataRoutes = require('./src/server/routes/data');
58
- const viewRoutes = require('./src/server/routes/views');
59
-
60
- const app = express();
61
- const port = Number(process.env.PORT) || DEFAULT_PORT;
62
-
63
- // Session Secret Setup
64
- let SESSION_SECRET = process.env.SESSION_SECRET;
65
- if (!SESSION_SECRET) {
66
- try {
67
- if (fs.existsSync(SESSION_SECRET_FILE)) {
68
- SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
69
- } else {
70
- // Generate secret using crypto.randomBytes
71
- SESSION_SECRET = crypto.randomBytes(48).toString('hex');
72
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
73
- fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
74
- }
75
- } catch (e) {
76
- console.warn('Failed to load session secret from disk, falling back to process env only.');
77
- }
78
- }
79
- if (!SESSION_SECRET) {
80
- throw new Error('SESSION_SECRET environment variable is required');
81
- }
82
-
83
- // Ensure Directories
84
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
85
- if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
86
-
87
- // Trust Proxy
88
- const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
89
- if (TRUST_PROXY) {
90
- app.set('trust proxy', true);
91
- }
92
-
93
- // Session Cookie Secure
94
- const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
95
- if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
96
- console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
97
- }
98
-
99
- // Wire up Agent Callbacks
100
- setProgressReporter(sendExecutionUpdate);
101
- setStopChecker((runId) => {
102
- if (!runId) return false;
103
- if (stopRequests.has(runId)) {
104
- stopRequests.delete(runId);
105
- return true;
106
- }
107
- return false;
108
- });
109
-
110
- // App Middleware
111
- app.use(requireIpAllowlist);
112
- app.use(express.json({ limit: '50mb' }));
113
-
114
- const sessionStore = new FileStore({
115
- path: SESSIONS_DIR,
116
- ttl: SESSION_TTL_SECONDS,
117
- retries: 5,
118
- retryDelay: 100,
119
- reapInterval: 3600,
120
- logFn: () => { }
121
- });
122
-
123
- // Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
124
- sessionStore.on('error', (err) => {
125
- if (err && err.code === 'EPERM') return; // Silently ignore
126
- if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
127
- console.error('[SESSION] Store error:', err);
128
- });
129
-
130
- app.use(session({
131
- store: sessionStore,
132
- secret: SESSION_SECRET,
133
- resave: false,
134
- saveUninitialized: false,
135
- cookie: {
136
- secure: SESSION_COOKIE_SECURE,
137
- sameSite: 'strict',
138
- maxAge: SESSION_TTL_SECONDS * 1000
139
- }
140
- }));
141
-
142
- app.use(csrfProtection);
143
-
144
- // API Routes
145
- app.use('/api/auth', authRoutes);
146
- app.use('/api/settings', settingsRoutes);
147
- app.use('/api/tasks', taskRoutes);
148
- app.use('/api/executions', executionRoutes);
149
- app.use('/api/data', dataRoutes);
150
-
151
- // View Routes & Static
152
- app.use('/', viewRoutes);
153
-
154
- // Execution Entry Points (Top-level routes kept for compatibility/simplicity)
155
- const registerExecution = (req, res, baseMeta = {}) => {
156
- // This is a simplified version of the one in server.js,
157
- // relying on the fact that handleScrape/Agent/Headful will handle the response.
158
- // However, the original registerExecution wrapped res.json to capture result
159
- // and appended to execution log on finish.
160
- // We need to restore that logic here or import it.
161
- // Since it was local to server.js, I should probably implement it here or imports.
162
- // It depends on `appendExecution`.
163
-
164
- // For now, I will re-implement it here using imports.
165
- const { appendExecution } = require('./src/server/storage');
166
-
167
- const start = Date.now();
168
- const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
169
- res.locals.executionId = requestId;
170
- const originalJson = res.json.bind(res);
171
- res.json = (body) => {
172
- res.locals.executionResult = body;
173
- return originalJson(body);
174
- };
175
- res.on('finish', () => {
176
- const durationMs = Date.now() - start;
177
- const body = req.body || {};
178
- const entry = {
179
- id: requestId,
180
- timestamp: start,
181
- method: req.method,
182
- path: req.path,
183
- status: res.statusCode,
184
- durationMs,
185
- source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
186
- mode: body.mode || baseMeta.mode || 'unknown',
187
- taskId: body.taskId || baseMeta.taskId || null,
188
- taskName: body.name || baseMeta.taskName || null,
189
- url: body.url || req.query.url || null,
190
- taskSnapshot: body.taskSnapshot || null,
191
- result: res.locals.executionResult || null
192
- };
193
- appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
194
- });
195
- };
196
-
197
- const preprocessScrapeRequest = (req) => {
198
- const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
199
- let safeVars = vars;
200
- if (typeof vars === 'string') {
201
- try { safeVars = JSON.parse(vars); } catch { }
202
- } else if (typeof vars !== 'object') {
203
- safeVars = {};
204
- }
205
-
206
- const resolve = (str) => {
207
- if (typeof str !== 'string') return str;
208
- return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
209
- if (name === 'now') return new Date().toISOString();
210
- const value = safeVars[name];
211
- if (value === undefined || value === null) return '';
212
- if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
213
- return String(value);
214
- }
215
- try {
216
- return JSON.stringify(value);
217
- } catch {
218
- return String(value);
219
- }
220
- });
221
- };
222
-
223
- if (req.body) {
224
- if (req.body.url) req.body.url = resolve(req.body.url);
225
- if (req.body.selector) req.body.selector = resolve(req.body.selector);
226
- if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
227
- }
228
- if (req.query) {
229
- if (req.query.url) req.query.url = resolve(req.query.url);
230
- if (req.query.selector) req.query.selector = resolve(req.query.selector);
231
- if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
232
- }
233
- };
234
-
235
- const executeTaskById = async (req, res) => {
236
- const taskId = req.params.id;
237
- let task;
238
- try {
239
- await loadTasks();
240
- task = getTaskById(taskId);
241
- } catch (e) {
242
- return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
243
- }
244
-
245
- if (!task) {
246
- return res.status(404).json({ error: 'TASK_NOT_FOUND' });
247
- }
248
-
249
- registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
250
-
251
- const clientVars = req.body.variables || req.body.taskVariables || {};
252
- const taskVars = {};
253
- if (task.variables) {
254
- for (const [key, v] of Object.entries(task.variables)) {
255
- taskVars[key] = v.value;
256
- }
257
- }
258
- const runtimeVars = { ...taskVars, ...clientVars };
259
-
260
- req.body = {
261
- ...req.body,
262
- ...task,
263
- url: req.body.url || task.url,
264
- taskId: task.id,
265
- variables: runtimeVars,
266
- taskVariables: runtimeVars,
267
- actions: task.actions || [],
268
- mode: task.mode || 'agent',
269
- extractionScript: req.body.extractionScript || task.extractionScript
270
- };
271
-
272
- if (task.mode === 'scrape') {
273
- preprocessScrapeRequest(req);
274
- return handleScrape(req, res);
275
- } else if (task.mode === 'headful') {
276
- if (req.body && typeof req.body.url === 'string') {
277
- req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
278
- const value = runtimeVars[name];
279
- if (value === undefined || value === null) return '';
280
- return String(value);
281
- });
282
- }
283
- return handleHeadful(req, res);
284
- } else {
285
- try {
286
- const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
287
- if (runId) {
288
- sendExecutionUpdate(runId, { status: 'started' });
289
- }
290
- } catch {
291
- // ignore
292
- }
293
- return handleAgent(req, res);
294
- }
295
- };
296
-
297
- app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
298
- app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
299
-
300
- app.all('/scrape', requireAuth, dataRateLimiter, (req, res) => {
301
- registerExecution(req, res, { mode: 'scrape' });
302
- preprocessScrapeRequest(req);
303
- return handleScrape(req, res);
304
- });
305
- app.all('/scraper', requireAuth, dataRateLimiter, (req, res) => {
306
- registerExecution(req, res, { mode: 'scrape' });
307
- preprocessScrapeRequest(req);
308
- return handleScrape(req, res);
309
- });
310
- app.all('/agent', requireAuth, dataRateLimiter, (req, res) => {
311
- registerExecution(req, res, { mode: 'agent' });
312
- try {
313
- const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
314
- if (runId) {
315
- sendExecutionUpdate(runId, { status: 'started' });
316
- }
317
- } catch {
318
- // ignore
319
- }
320
- return handleAgent(req, res);
321
- });
322
- app.post('/headful', requireAuth, dataRateLimiter, (req, res) => {
323
- registerExecution(req, res, { mode: 'headful' });
324
- if (req.body && typeof req.body.url === 'string') {
325
- const vars = req.body.taskVariables || req.body.variables || {};
326
- req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
327
- const value = vars[name];
328
- if (value === undefined || value === null) return '';
329
- return String(value);
330
- });
331
- }
332
- return handleHeadful(req, res);
333
- });
334
- app.post('/headful/stop', requireAuth, stopHeadful);
335
-
336
- // Ensure public/captures directory exists
337
- const capturesDir = path.join(__dirname, 'public', 'captures');
338
- if (!fs.existsSync(capturesDir)) {
339
- fs.mkdirSync(capturesDir, { recursive: true });
340
- }
341
-
342
- // NoVNC Setup
343
- const novncDirCandidates = [
344
- '/opt/novnc',
345
- '/usr/share/novnc'
346
- ];
347
- const novncDir = novncDirCandidates.find((candidate) => {
348
- try {
349
- return fs.existsSync(candidate);
350
- } catch {
351
- return false;
352
- }
353
- });
354
- const novncEnabled = !!novncDir;
355
- if (novncDir) {
356
- app.use('/novnc', express.static(novncDir));
357
- }
358
-
359
- // Static Files
360
- app.use('/captures', express.static(capturesDir));
361
- app.use('/screenshots', express.static(capturesDir));
362
- app.use(express.static(DIST_DIR));
363
-
364
- // Headful Status Endpoint
365
- app.get('/api/headful/status', (req, res) => {
366
- // If noVNC is enabled (found on disk), we consider the environment optimized.
367
- // This allows Docker and manual installations with proper deps to hide the disclaimer.
368
- res.json({ useNovnc: novncEnabled });
369
- });
370
-
371
- // Start Server
372
- findAvailablePort(port, 20)
373
- .then((availablePort) => {
374
- if (availablePort !== port) {
375
- console.log(`Port ${port} in use, switched to ${availablePort}.`);
376
- }
377
- const server = app.listen(availablePort, '0.0.0.0', () => {
378
- const address = server.address();
379
- const displayPort = typeof address === 'object' && address ? address.port : availablePort;
380
- console.log(`Server running at http://localhost:${displayPort}`);
381
- });
382
- server.on('upgrade', async (req, socket, head) => {
383
- if (!await isIpAllowed(req.socket?.remoteAddress)) {
384
- try {
385
- socket.destroy();
386
- } catch {
387
- // ignore
388
- }
389
- return;
390
- }
391
- const handled = proxyWebsockify(req, socket, head);
392
- if (!handled) {
393
- socket.destroy();
394
- }
395
- });
396
- server.on('error', (err) => {
397
- console.error('Server failed to start:', err.message || err);
398
- process.exit(1);
399
- });
400
- })
401
- .catch((err) => {
402
- console.error('Server failed to start:', err.message || err);
403
- process.exit(1);
404
- });
1
+ const express = require('express');
2
+ const session = require('express-session');
3
+ const FileStore = require('session-file-store')(session);
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const crypto = require('crypto');
7
+
8
+ // Catch unhandled promise rejections from playwright-extra stealth plugin.
9
+ // When pages close before the plugin finishes async CDP initialization,
10
+ // benign rejections bubble up and would otherwise crash the process.
11
+ process.on('unhandledRejection', (reason) => {
12
+ const msg = reason && reason.message ? reason.message : String(reason);
13
+ if (/Target page, context or browser has been closed/i.test(msg)) {
14
+ console.warn('[STEALTH] Suppressed benign rejection:', msg);
15
+ return;
16
+ }
17
+ console.error('Unhandled rejection:', reason);
18
+ });
19
+
20
+ // Constants
21
+ const {
22
+ DEFAULT_PORT,
23
+ DIST_DIR,
24
+ DATA_DIR,
25
+ SESSIONS_DIR,
26
+ SESSION_SECRET_FILE,
27
+ SESSION_TTL_SECONDS,
28
+ NOVNC_PORT,
29
+ WEBSOCKIFY_PATH
30
+ } = require('./src/server/constants');
31
+
32
+ const {
33
+ loadTasks,
34
+ getTaskById
35
+ } = require('./src/server/storage');
36
+
37
+ // Context & Utils
38
+ const {
39
+ executionStreams,
40
+ stopRequests,
41
+ sendExecutionUpdate
42
+ } = require('./src/server/state');
43
+ const {
44
+ findAvailablePort,
45
+ proxyWebsockify,
46
+ isPortAvailable
47
+ } = require('./src/server/utils');
48
+
49
+ // Middleware
50
+ const {
51
+ authRateLimiter,
52
+ dataRateLimiter,
53
+ csrfProtection,
54
+ requireIpAllowlist,
55
+ requireAuth,
56
+ isIpAllowed,
57
+ requireApiKey,
58
+ requireAuthOrApiKey
59
+ } = require('./src/server/middleware');
60
+
61
+ // Feature Modules (Legacy/Existing)
62
+ const { handleScrape } = require('./scrape');
63
+ const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
64
+ const { handleHeadful, stopHeadful, toggleInspectMode, headfulEventEmitter } = require('./headful');
65
+
66
+ // Routes
67
+ const authRoutes = require('./src/server/routes/auth');
68
+ const settingsRoutes = require('./src/server/routes/settings');
69
+ const taskRoutes = require('./src/server/routes/tasks');
70
+ const executionRoutes = require('./src/server/routes/executions');
71
+ const dataRoutes = require('./src/server/routes/data');
72
+ const viewRoutes = require('./src/server/routes/views');
73
+ const scheduleRoutes = require('./src/server/routes/schedules');
74
+ const credentialRoutes = require('./src/server/routes/credentials');
75
+ const { pushOutput } = require('./src/server/outputProviders');
76
+ const { migrateStorageState } = require('./src/server/migrate-storage');
77
+
78
+ const app = express();
79
+ app.disable('x-powered-by');
80
+ const port = Number(process.env.PORT) || DEFAULT_PORT;
81
+
82
+ // Session Secret Setup
83
+ let SESSION_SECRET = process.env.SESSION_SECRET;
84
+ if (!SESSION_SECRET) {
85
+ try {
86
+ if (fs.existsSync(SESSION_SECRET_FILE)) {
87
+ SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
88
+ } else {
89
+ // Generate secret using crypto.randomBytes
90
+ SESSION_SECRET = crypto.randomBytes(48).toString('hex');
91
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
92
+ fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
93
+ }
94
+ } catch (e) {
95
+ console.warn('Failed to load session secret from disk, falling back to process env only.');
96
+ }
97
+ }
98
+ if (!SESSION_SECRET) {
99
+ throw new Error('SESSION_SECRET environment variable is required');
100
+ }
101
+
102
+ // Ensure Directories
103
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
104
+ if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
105
+
106
+ // Trust Proxy
107
+ const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
108
+ if (TRUST_PROXY) {
109
+ app.set('trust proxy', true);
110
+ }
111
+
112
+ // Session Cookie Secure
113
+ const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
114
+ if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
115
+ console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
116
+ }
117
+
118
+ // Wire up Agent Callbacks
119
+ setProgressReporter(sendExecutionUpdate);
120
+ setStopChecker((runId) => {
121
+ if (!runId) return false;
122
+ if (stopRequests.has(runId)) {
123
+ stopRequests.delete(runId);
124
+ return true;
125
+ }
126
+ return false;
127
+ });
128
+
129
+ // App Middleware
130
+ app.use(requireIpAllowlist);
131
+
132
+ // Security Headers
133
+ app.use((req, res, next) => {
134
+ res.setHeader('X-Content-Type-Options', 'nosniff');
135
+ res.setHeader('X-Frame-Options', 'SAMEORIGIN');
136
+ res.setHeader('X-XSS-Protection', '1; mode=block');
137
+ res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
138
+ next();
139
+ });
140
+
141
+ app.use(express.json({ limit: '2mb' }));
142
+
143
+ const sessionStore = new FileStore({
144
+ path: SESSIONS_DIR,
145
+ ttl: SESSION_TTL_SECONDS,
146
+ retries: 5,
147
+ retryDelay: 100,
148
+ reapInterval: 3600,
149
+ logFn: () => { }
150
+ });
151
+
152
+ // Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
153
+ sessionStore.on('error', (err) => {
154
+ if (err && err.code === 'EPERM') return; // Silently ignore
155
+ if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
156
+ console.error('[SESSION] Store error:', err);
157
+ });
158
+
159
+ app.use(session({
160
+ store: sessionStore,
161
+ secret: SESSION_SECRET,
162
+ resave: false,
163
+ saveUninitialized: false,
164
+ cookie: {
165
+ secure: SESSION_COOKIE_SECURE,
166
+ sameSite: 'strict',
167
+ maxAge: SESSION_TTL_SECONDS * 1000
168
+ }
169
+ }));
170
+
171
+ app.use(csrfProtection);
172
+
173
+ // API Routes
174
+ app.use('/api/auth', authRoutes);
175
+ app.use('/api/settings', settingsRoutes);
176
+ app.use('/api/tasks', taskRoutes);
177
+ app.use('/api/executions', executionRoutes);
178
+ app.use('/api/data', dataRoutes);
179
+ app.use('/api/schedules', scheduleRoutes);
180
+ app.use('/api/credentials', credentialRoutes);
181
+
182
+ // View Routes & Static
183
+ app.use('/', viewRoutes);
184
+
185
+ // Execution Entry Points (Top-level routes kept for compatibility/simplicity)
186
+ const registerExecution = (req, res, baseMeta = {}) => {
187
+ // This is a simplified version of the one in server.js,
188
+ // relying on the fact that handleScrape/Agent/Headful will handle the response.
189
+ // However, the original registerExecution wrapped res.json to capture result
190
+ // and appended to execution log on finish.
191
+ // We need to restore that logic here or import it.
192
+ // Since it was local to server.js, I should probably implement it here or imports.
193
+ // It depends on `appendExecution`.
194
+
195
+ // For now, I will re-implement it here using imports.
196
+ const { appendExecution } = require('./src/server/storage');
197
+
198
+ const start = Date.now();
199
+ const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
200
+ res.locals.executionId = requestId;
201
+ const originalJson = res.json.bind(res);
202
+ res.json = (body) => {
203
+ res.locals.executionResult = body;
204
+ return originalJson(body);
205
+ };
206
+ res.on('finish', () => {
207
+ const durationMs = Date.now() - start;
208
+ const body = req.body || {};
209
+ const entry = {
210
+ id: requestId,
211
+ timestamp: start,
212
+ method: req.method,
213
+ path: req.path,
214
+ status: res.statusCode,
215
+ durationMs,
216
+ source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
217
+ mode: body.mode || baseMeta.mode || 'unknown',
218
+ taskId: body.taskId || baseMeta.taskId || null,
219
+ taskName: body.name || baseMeta.taskName || null,
220
+ url: body.url || req.query.url || null,
221
+ taskSnapshot: body.taskSnapshot || null,
222
+ result: res.locals.executionResult || null
223
+ };
224
+ appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
225
+
226
+ const outputConfig = body.output || (body.taskSnapshot && body.taskSnapshot.output);
227
+ if (outputConfig && entry.result) {
228
+ pushOutput(outputConfig, entry.result.data, requestId)
229
+ .catch(err => console.error('[OUTPUT] Unexpected error:', err));
230
+ }
231
+ });
232
+ };
233
+
234
+ const preprocessScrapeRequest = (req) => {
235
+ const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
236
+ let safeVars = vars;
237
+ if (typeof vars === 'string') {
238
+ try { safeVars = JSON.parse(vars); } catch { }
239
+ } else if (typeof vars !== 'object') {
240
+ safeVars = {};
241
+ }
242
+
243
+ const resolve = (str) => {
244
+ if (typeof str !== 'string') return str;
245
+ return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
246
+ if (name === 'now') return new Date().toISOString();
247
+ const value = safeVars[name];
248
+ if (value === undefined || value === null) return '';
249
+ if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
250
+ return String(value);
251
+ }
252
+ try {
253
+ return JSON.stringify(value);
254
+ } catch {
255
+ return String(value);
256
+ }
257
+ });
258
+ };
259
+
260
+ if (req.body) {
261
+ if (req.body.url) req.body.url = resolve(req.body.url);
262
+ if (req.body.selector) req.body.selector = resolve(req.body.selector);
263
+ if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
264
+ }
265
+ if (req.query) {
266
+ if (req.query.url) req.query.url = resolve(req.query.url);
267
+ if (req.query.selector) req.query.selector = resolve(req.query.selector);
268
+ if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
269
+ }
270
+ };
271
+
272
+ const executeTaskById = async (req, res) => {
273
+ const taskId = req.params.id;
274
+ let task;
275
+ try {
276
+ await loadTasks();
277
+ task = getTaskById(taskId);
278
+ } catch (e) {
279
+ return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
280
+ }
281
+
282
+ if (!task) {
283
+ return res.status(404).json({ error: 'TASK_NOT_FOUND' });
284
+ }
285
+
286
+ registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
287
+
288
+ const clientVars = req.body.variables || req.body.taskVariables || {};
289
+ const taskVars = {};
290
+ if (task.variables) {
291
+ for (const [key, v] of Object.entries(task.variables)) {
292
+ taskVars[key] = v.value;
293
+ }
294
+ }
295
+ const runtimeVars = { ...taskVars, ...clientVars };
296
+
297
+ req.body = {
298
+ ...req.body,
299
+ ...task,
300
+ url: req.body.url || task.url,
301
+ taskId: task.id,
302
+ variables: runtimeVars,
303
+ taskVariables: runtimeVars,
304
+ actions: task.actions || [],
305
+ mode: task.mode || 'agent',
306
+ extractionScript: req.body.extractionScript || task.extractionScript
307
+ };
308
+
309
+ if (task.mode === 'scrape') {
310
+ preprocessScrapeRequest(req);
311
+ return handleScrape(req, res);
312
+ } else if (task.mode === 'headful') {
313
+ if (req.body && typeof req.body.url === 'string') {
314
+ req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
315
+ const value = runtimeVars[name];
316
+ if (value === undefined || value === null) return '';
317
+ return String(value);
318
+ });
319
+ }
320
+ return handleHeadful(req, res);
321
+ } else {
322
+ try {
323
+ const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
324
+ if (runId) {
325
+ sendExecutionUpdate(runId, { status: 'started' });
326
+ }
327
+ } catch {
328
+ // ignore
329
+ }
330
+ return handleAgent(req, res);
331
+ }
332
+ };
333
+
334
+ app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
335
+ app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
336
+
337
+ app.all('/scrape', requireAuth, dataRateLimiter, (req, res) => {
338
+ registerExecution(req, res, { mode: 'scrape' });
339
+ preprocessScrapeRequest(req);
340
+ return handleScrape(req, res);
341
+ });
342
+ app.all('/scraper', requireAuth, dataRateLimiter, (req, res) => {
343
+ registerExecution(req, res, { mode: 'scrape' });
344
+ preprocessScrapeRequest(req);
345
+ return handleScrape(req, res);
346
+ });
347
+ app.all('/agent', requireAuth, dataRateLimiter, (req, res) => {
348
+ registerExecution(req, res, { mode: 'agent' });
349
+ try {
350
+ const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
351
+ if (runId) {
352
+ sendExecutionUpdate(runId, { status: 'started' });
353
+ }
354
+ } catch {
355
+ // ignore
356
+ }
357
+ return handleAgent(req, res);
358
+ });
359
+ app.post('/headful', requireAuth, dataRateLimiter, (req, res) => {
360
+ registerExecution(req, res, { mode: 'headful' });
361
+ if (req.body && typeof req.body.url === 'string') {
362
+ const vars = req.body.taskVariables || req.body.variables || {};
363
+ req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
364
+ const value = vars[name];
365
+ if (value === undefined || value === null) return '';
366
+ return String(value);
367
+ });
368
+ }
369
+ return handleHeadful(req, res);
370
+ });
371
+ app.post('/headful/stop', requireAuth, stopHeadful);
372
+
373
+ // Ensure public/captures directory exists
374
+ const capturesDir = path.join(__dirname, 'public', 'captures');
375
+ if (!fs.existsSync(capturesDir)) {
376
+ fs.mkdirSync(capturesDir, { recursive: true });
377
+ }
378
+
379
+ // NoVNC Setup
380
+ const novncDirCandidates = [
381
+ '/opt/novnc',
382
+ '/usr/share/novnc'
383
+ ];
384
+ const novncDir = novncDirCandidates.find((candidate) => {
385
+ try {
386
+ return fs.existsSync(candidate);
387
+ } catch {
388
+ return false;
389
+ }
390
+ });
391
+ const novncEnabled = !!novncDir;
392
+ if (novncDir) {
393
+ app.use('/novnc', express.static(novncDir));
394
+ }
395
+
396
+ // Static Files
397
+ app.use('/captures', requireAuthOrApiKey, express.static(capturesDir));
398
+ app.use('/screenshots', requireAuthOrApiKey, express.static(capturesDir));
399
+ app.use(express.static(DIST_DIR));
400
+
401
+ // Headful Status Endpoint
402
+ app.get('/api/headful/status', async (req, res) => {
403
+ if (!novncEnabled) {
404
+ return res.json({ useNovnc: false });
405
+ }
406
+ // Check if the novnc port is actually in use
407
+ const portAvailable = await isPortAvailable(NOVNC_PORT);
408
+ // If the port is NOT available, something (websockify) is listening on it
409
+ res.json({ useNovnc: !portAvailable });
410
+ });
411
+
412
+ app.get('/api/headful/selector_stream', requireAuth, (req, res) => {
413
+ res.setHeader('Content-Type', 'text/event-stream');
414
+ res.setHeader('Cache-Control', 'no-cache');
415
+ res.setHeader('Connection', 'keep-alive');
416
+ if (typeof res.flushHeaders === 'function') res.flushHeaders();
417
+ res.write('event: ready\ndata: {}\n\n');
418
+
419
+ const onSelectorSelected = (selector) => {
420
+ try {
421
+ res.write(`data: ${JSON.stringify({ selector })}\n\n`);
422
+ } catch (err) {
423
+ // ignore
424
+ }
425
+ };
426
+
427
+ headfulEventEmitter.on('selectorSelected', onSelectorSelected);
428
+
429
+ const keepAlive = setInterval(() => {
430
+ try {
431
+ res.write(':keep-alive\n\n');
432
+ } catch {
433
+ // ignore
434
+ }
435
+ }, 20000);
436
+
437
+ req.on('close', () => {
438
+ clearInterval(keepAlive);
439
+ headfulEventEmitter.off('selectorSelected', onSelectorSelected);
440
+ });
441
+ });
442
+
443
+ app.get('/headful/selector_stream', requireAuth, (req, res) => {
444
+ res.setHeader('Content-Type', 'text/event-stream');
445
+ res.setHeader('Cache-Control', 'no-cache');
446
+ res.setHeader('Connection', 'keep-alive');
447
+ if (typeof res.flushHeaders === 'function') res.flushHeaders();
448
+ res.write('event: ready\ndata: {}\n\n');
449
+
450
+ const onSelectorSelected = (selector) => {
451
+ try {
452
+ res.write(`data: ${JSON.stringify({ selector })}\n\n`);
453
+ } catch (err) {
454
+ // ignore
455
+ }
456
+ };
457
+
458
+ headfulEventEmitter.on('selectorSelected', onSelectorSelected);
459
+
460
+ const keepAlive = setInterval(() => {
461
+ try {
462
+ res.write(':keep-alive\n\n');
463
+ } catch {
464
+ // ignore
465
+ }
466
+ }, 20000);
467
+
468
+ req.on('close', () => {
469
+ clearInterval(keepAlive);
470
+ headfulEventEmitter.off('selectorSelected', onSelectorSelected);
471
+ });
472
+ });
473
+
474
+ app.post('/api/headful/inspect', requireAuth, toggleInspectMode);
475
+ app.post('/headful/inspect', requireAuth, toggleInspectMode);
476
+
477
+ // Start Server
478
+ findAvailablePort(port, 20)
479
+ .then((availablePort) => {
480
+ if (availablePort !== port) {
481
+ console.log(`Port ${port} in use, switched to ${availablePort}.`);
482
+ }
483
+ const server = app.listen(availablePort, '0.0.0.0', () => {
484
+ const address = server.address();
485
+ const displayPort = typeof address === 'object' && address ? address.port : availablePort;
486
+ console.log(`Server running at http://localhost:${displayPort}`);
487
+
488
+ // One-time migration of storage_state.json cookies into persistent browser profiles
489
+ migrateStorageState().catch(err => console.error('[MIGRATION] Failed:', err.message));
490
+
491
+ // Start the cron scheduler
492
+ const { startScheduler } = require('./src/server/scheduler');
493
+ startScheduler().catch(err => console.error('[SCHEDULER] Failed to start:', err.message));
494
+ });
495
+ server.on('upgrade', async (req, socket, head) => {
496
+ if (!await isIpAllowed(req.socket?.remoteAddress)) {
497
+ try {
498
+ socket.destroy();
499
+ } catch {
500
+ // ignore
501
+ }
502
+ return;
503
+ }
504
+ const handled = proxyWebsockify(req, socket, head);
505
+ if (!handled) {
506
+ socket.destroy();
507
+ }
508
+ });
509
+ server.on('error', (err) => {
510
+ console.error('Server failed to start:', err.message || err);
511
+ process.exit(1);
512
+ });
513
+ })
514
+ .catch((err) => {
515
+ console.error('Server failed to start:', err.message || err);
516
+ process.exit(1);
517
+ });