figranium 0.9.1 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,404 +1,501 @@
1
- const express = require('express');
2
- const session = require('express-session');
3
- const FileStore = require('session-file-store')(session);
4
- const fs = require('fs');
5
- const path = require('path');
6
- const crypto = require('crypto');
7
-
8
- // Constants
9
- const {
10
- DEFAULT_PORT,
11
- DIST_DIR,
12
- DATA_DIR,
13
- SESSIONS_DIR,
14
- SESSION_SECRET_FILE,
15
- SESSION_TTL_SECONDS,
16
- NOVNC_PORT,
17
- WEBSOCKIFY_PATH
18
- } = require('./src/server/constants');
19
-
20
- const {
21
- loadTasks,
22
- getTaskById
23
- } = require('./src/server/storage');
24
-
25
- // Context & Utils
26
- const {
27
- executionStreams,
28
- stopRequests,
29
- sendExecutionUpdate
30
- } = require('./src/server/state');
31
- const {
32
- findAvailablePort,
33
- proxyWebsockify
34
- } = require('./src/server/utils');
35
-
36
- // Middleware
37
- const {
38
- authRateLimiter,
39
- dataRateLimiter,
40
- csrfProtection,
41
- requireIpAllowlist,
42
- requireAuth,
43
- isIpAllowed,
44
- requireApiKey
45
- } = require('./src/server/middleware');
46
-
47
- // Feature Modules (Legacy/Existing)
48
- const { handleScrape } = require('./scrape');
49
- const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
50
- const { handleHeadful, stopHeadful } = require('./headful');
51
-
52
- // Routes
53
- const authRoutes = require('./src/server/routes/auth');
54
- const settingsRoutes = require('./src/server/routes/settings');
55
- const taskRoutes = require('./src/server/routes/tasks');
56
- const executionRoutes = require('./src/server/routes/executions');
57
- const dataRoutes = require('./src/server/routes/data');
58
- const viewRoutes = require('./src/server/routes/views');
59
-
60
- const app = express();
61
- const port = Number(process.env.PORT) || DEFAULT_PORT;
62
-
63
- // Session Secret Setup
64
- let SESSION_SECRET = process.env.SESSION_SECRET;
65
- if (!SESSION_SECRET) {
66
- try {
67
- if (fs.existsSync(SESSION_SECRET_FILE)) {
68
- SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
69
- } else {
70
- // Generate secret using crypto.randomBytes
71
- SESSION_SECRET = crypto.randomBytes(48).toString('hex');
72
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
73
- fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
74
- }
75
- } catch (e) {
76
- console.warn('Failed to load session secret from disk, falling back to process env only.');
77
- }
78
- }
79
- if (!SESSION_SECRET) {
80
- throw new Error('SESSION_SECRET environment variable is required');
81
- }
82
-
83
- // Ensure Directories
84
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
85
- if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
86
-
87
- // Trust Proxy
88
- const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
89
- if (TRUST_PROXY) {
90
- app.set('trust proxy', true);
91
- }
92
-
93
- // Session Cookie Secure
94
- const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
95
- if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
96
- console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
97
- }
98
-
99
- // Wire up Agent Callbacks
100
- setProgressReporter(sendExecutionUpdate);
101
- setStopChecker((runId) => {
102
- if (!runId) return false;
103
- if (stopRequests.has(runId)) {
104
- stopRequests.delete(runId);
105
- return true;
106
- }
107
- return false;
108
- });
109
-
110
- // App Middleware
111
- app.use(requireIpAllowlist);
112
- app.use(express.json({ limit: '50mb' }));
113
-
114
- const sessionStore = new FileStore({
115
- path: SESSIONS_DIR,
116
- ttl: SESSION_TTL_SECONDS,
117
- retries: 5,
118
- retryDelay: 100,
119
- reapInterval: 3600,
120
- logFn: () => { }
121
- });
122
-
123
- // Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
124
- sessionStore.on('error', (err) => {
125
- if (err && err.code === 'EPERM') return; // Silently ignore
126
- if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
127
- console.error('[SESSION] Store error:', err);
128
- });
129
-
130
- app.use(session({
131
- store: sessionStore,
132
- secret: SESSION_SECRET,
133
- resave: false,
134
- saveUninitialized: false,
135
- cookie: {
136
- secure: SESSION_COOKIE_SECURE,
137
- sameSite: 'strict',
138
- maxAge: SESSION_TTL_SECONDS * 1000
139
- }
140
- }));
141
-
142
- app.use(csrfProtection);
143
-
144
- // API Routes
145
- app.use('/api/auth', authRoutes);
146
- app.use('/api/settings', settingsRoutes);
147
- app.use('/api/tasks', taskRoutes);
148
- app.use('/api/executions', executionRoutes);
149
- app.use('/api/data', dataRoutes);
150
-
151
- // View Routes & Static
152
- app.use('/', viewRoutes);
153
-
154
- // Execution Entry Points (Top-level routes kept for compatibility/simplicity)
155
- const registerExecution = (req, res, baseMeta = {}) => {
156
- // This is a simplified version of the one in server.js,
157
- // relying on the fact that handleScrape/Agent/Headful will handle the response.
158
- // However, the original registerExecution wrapped res.json to capture result
159
- // and appended to execution log on finish.
160
- // We need to restore that logic here or import it.
161
- // Since it was local to server.js, I should probably implement it here or imports.
162
- // It depends on `appendExecution`.
163
-
164
- // For now, I will re-implement it here using imports.
165
- const { appendExecution } = require('./src/server/storage');
166
-
167
- const start = Date.now();
168
- const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
169
- res.locals.executionId = requestId;
170
- const originalJson = res.json.bind(res);
171
- res.json = (body) => {
172
- res.locals.executionResult = body;
173
- return originalJson(body);
174
- };
175
- res.on('finish', () => {
176
- const durationMs = Date.now() - start;
177
- const body = req.body || {};
178
- const entry = {
179
- id: requestId,
180
- timestamp: start,
181
- method: req.method,
182
- path: req.path,
183
- status: res.statusCode,
184
- durationMs,
185
- source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
186
- mode: body.mode || baseMeta.mode || 'unknown',
187
- taskId: body.taskId || baseMeta.taskId || null,
188
- taskName: body.name || baseMeta.taskName || null,
189
- url: body.url || req.query.url || null,
190
- taskSnapshot: body.taskSnapshot || null,
191
- result: res.locals.executionResult || null
192
- };
193
- appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
194
- });
195
- };
196
-
197
- const preprocessScrapeRequest = (req) => {
198
- const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
199
- let safeVars = vars;
200
- if (typeof vars === 'string') {
201
- try { safeVars = JSON.parse(vars); } catch { }
202
- } else if (typeof vars !== 'object') {
203
- safeVars = {};
204
- }
205
-
206
- const resolve = (str) => {
207
- if (typeof str !== 'string') return str;
208
- return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
209
- if (name === 'now') return new Date().toISOString();
210
- const value = safeVars[name];
211
- if (value === undefined || value === null) return '';
212
- if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
213
- return String(value);
214
- }
215
- try {
216
- return JSON.stringify(value);
217
- } catch {
218
- return String(value);
219
- }
220
- });
221
- };
222
-
223
- if (req.body) {
224
- if (req.body.url) req.body.url = resolve(req.body.url);
225
- if (req.body.selector) req.body.selector = resolve(req.body.selector);
226
- if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
227
- }
228
- if (req.query) {
229
- if (req.query.url) req.query.url = resolve(req.query.url);
230
- if (req.query.selector) req.query.selector = resolve(req.query.selector);
231
- if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
232
- }
233
- };
234
-
235
- const executeTaskById = async (req, res) => {
236
- const taskId = req.params.id;
237
- let task;
238
- try {
239
- await loadTasks();
240
- task = getTaskById(taskId);
241
- } catch (e) {
242
- return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
243
- }
244
-
245
- if (!task) {
246
- return res.status(404).json({ error: 'TASK_NOT_FOUND' });
247
- }
248
-
249
- registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
250
-
251
- const clientVars = req.body.variables || req.body.taskVariables || {};
252
- const taskVars = {};
253
- if (task.variables) {
254
- for (const [key, v] of Object.entries(task.variables)) {
255
- taskVars[key] = v.value;
256
- }
257
- }
258
- const runtimeVars = { ...taskVars, ...clientVars };
259
-
260
- req.body = {
261
- ...req.body,
262
- ...task,
263
- url: req.body.url || task.url,
264
- taskId: task.id,
265
- variables: runtimeVars,
266
- taskVariables: runtimeVars,
267
- actions: task.actions || [],
268
- mode: task.mode || 'agent',
269
- extractionScript: req.body.extractionScript || task.extractionScript
270
- };
271
-
272
- if (task.mode === 'scrape') {
273
- preprocessScrapeRequest(req);
274
- return handleScrape(req, res);
275
- } else if (task.mode === 'headful') {
276
- if (req.body && typeof req.body.url === 'string') {
277
- req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
278
- const value = runtimeVars[name];
279
- if (value === undefined || value === null) return '';
280
- return String(value);
281
- });
282
- }
283
- return handleHeadful(req, res);
284
- } else {
285
- try {
286
- const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
287
- if (runId) {
288
- sendExecutionUpdate(runId, { status: 'started' });
289
- }
290
- } catch {
291
- // ignore
292
- }
293
- return handleAgent(req, res);
294
- }
295
- };
296
-
297
- app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
298
- app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
299
-
300
- app.all('/scrape', requireAuth, dataRateLimiter, (req, res) => {
301
- registerExecution(req, res, { mode: 'scrape' });
302
- preprocessScrapeRequest(req);
303
- return handleScrape(req, res);
304
- });
305
- app.all('/scraper', requireAuth, dataRateLimiter, (req, res) => {
306
- registerExecution(req, res, { mode: 'scrape' });
307
- preprocessScrapeRequest(req);
308
- return handleScrape(req, res);
309
- });
310
- app.all('/agent', requireAuth, dataRateLimiter, (req, res) => {
311
- registerExecution(req, res, { mode: 'agent' });
312
- try {
313
- const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
314
- if (runId) {
315
- sendExecutionUpdate(runId, { status: 'started' });
316
- }
317
- } catch {
318
- // ignore
319
- }
320
- return handleAgent(req, res);
321
- });
322
- app.post('/headful', requireAuth, dataRateLimiter, (req, res) => {
323
- registerExecution(req, res, { mode: 'headful' });
324
- if (req.body && typeof req.body.url === 'string') {
325
- const vars = req.body.taskVariables || req.body.variables || {};
326
- req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
327
- const value = vars[name];
328
- if (value === undefined || value === null) return '';
329
- return String(value);
330
- });
331
- }
332
- return handleHeadful(req, res);
333
- });
334
- app.post('/headful/stop', requireAuth, stopHeadful);
335
-
336
- // Ensure public/captures directory exists
337
- const capturesDir = path.join(__dirname, 'public', 'captures');
338
- if (!fs.existsSync(capturesDir)) {
339
- fs.mkdirSync(capturesDir, { recursive: true });
340
- }
341
-
342
- // NoVNC Setup
343
- const novncDirCandidates = [
344
- '/opt/novnc',
345
- '/usr/share/novnc'
346
- ];
347
- const novncDir = novncDirCandidates.find((candidate) => {
348
- try {
349
- return fs.existsSync(candidate);
350
- } catch {
351
- return false;
352
- }
353
- });
354
- const novncEnabled = !!novncDir;
355
- if (novncDir) {
356
- app.use('/novnc', express.static(novncDir));
357
- }
358
-
359
- // Static Files
360
- app.use('/captures', express.static(capturesDir));
361
- app.use('/screenshots', express.static(capturesDir));
362
- app.use(express.static(DIST_DIR));
363
-
364
- // Headful Status Endpoint
365
- app.get('/api/headful/status', (req, res) => {
366
- // If noVNC is enabled (found on disk), we consider the environment optimized.
367
- // This allows Docker and manual installations with proper deps to hide the disclaimer.
368
- res.json({ useNovnc: novncEnabled });
369
- });
370
-
371
- // Start Server
372
- findAvailablePort(port, 20)
373
- .then((availablePort) => {
374
- if (availablePort !== port) {
375
- console.log(`Port ${port} in use, switched to ${availablePort}.`);
376
- }
377
- const server = app.listen(availablePort, '0.0.0.0', () => {
378
- const address = server.address();
379
- const displayPort = typeof address === 'object' && address ? address.port : availablePort;
380
- console.log(`Server running at http://localhost:${displayPort}`);
381
- });
382
- server.on('upgrade', async (req, socket, head) => {
383
- if (!await isIpAllowed(req.socket?.remoteAddress)) {
384
- try {
385
- socket.destroy();
386
- } catch {
387
- // ignore
388
- }
389
- return;
390
- }
391
- const handled = proxyWebsockify(req, socket, head);
392
- if (!handled) {
393
- socket.destroy();
394
- }
395
- });
396
- server.on('error', (err) => {
397
- console.error('Server failed to start:', err.message || err);
398
- process.exit(1);
399
- });
400
- })
401
- .catch((err) => {
402
- console.error('Server failed to start:', err.message || err);
403
- process.exit(1);
404
- });
1
+ const express = require('express');
2
+ const session = require('express-session');
3
+ const FileStore = require('session-file-store')(session);
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const crypto = require('crypto');
7
+
8
+ // Constants
9
+ const {
10
+ DEFAULT_PORT,
11
+ DIST_DIR,
12
+ DATA_DIR,
13
+ SESSIONS_DIR,
14
+ SESSION_SECRET_FILE,
15
+ SESSION_TTL_SECONDS,
16
+ NOVNC_PORT,
17
+ WEBSOCKIFY_PATH
18
+ } = require('./src/server/constants');
19
+
20
+ const {
21
+ loadTasks,
22
+ getTaskById
23
+ } = require('./src/server/storage');
24
+
25
+ // Context & Utils
26
+ const {
27
+ executionStreams,
28
+ stopRequests,
29
+ sendExecutionUpdate
30
+ } = require('./src/server/state');
31
+ const {
32
+ findAvailablePort,
33
+ proxyWebsockify,
34
+ isPortAvailable
35
+ } = require('./src/server/utils');
36
+
37
+ // Middleware
38
+ const {
39
+ authRateLimiter,
40
+ dataRateLimiter,
41
+ csrfProtection,
42
+ requireIpAllowlist,
43
+ requireAuth,
44
+ isIpAllowed,
45
+ requireApiKey,
46
+ requireAuthOrApiKey
47
+ } = require('./src/server/middleware');
48
+
49
+ // Feature Modules (Legacy/Existing)
50
+ const { handleScrape } = require('./scrape');
51
+ const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
52
+ const { handleHeadful, stopHeadful, toggleInspectMode, headfulEventEmitter } = require('./headful');
53
+
54
+ // Routes
55
+ const authRoutes = require('./src/server/routes/auth');
56
+ const settingsRoutes = require('./src/server/routes/settings');
57
+ const taskRoutes = require('./src/server/routes/tasks');
58
+ const executionRoutes = require('./src/server/routes/executions');
59
+ const dataRoutes = require('./src/server/routes/data');
60
+ const viewRoutes = require('./src/server/routes/views');
61
+ const scheduleRoutes = require('./src/server/routes/schedules');
62
+ const credentialRoutes = require('./src/server/routes/credentials');
63
+ const { pushOutput } = require('./src/server/outputProviders');
64
+
65
+ const app = express();
66
+ app.disable('x-powered-by');
67
+ const port = Number(process.env.PORT) || DEFAULT_PORT;
68
+
69
+ // Session Secret Setup
70
+ let SESSION_SECRET = process.env.SESSION_SECRET;
71
+ if (!SESSION_SECRET) {
72
+ try {
73
+ if (fs.existsSync(SESSION_SECRET_FILE)) {
74
+ SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
75
+ } else {
76
+ // Generate secret using crypto.randomBytes
77
+ SESSION_SECRET = crypto.randomBytes(48).toString('hex');
78
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
79
+ fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
80
+ }
81
+ } catch (e) {
82
+ console.warn('Failed to load session secret from disk, falling back to process env only.');
83
+ }
84
+ }
85
+ if (!SESSION_SECRET) {
86
+ throw new Error('SESSION_SECRET environment variable is required');
87
+ }
88
+
89
+ // Ensure Directories
90
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
91
+ if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
92
+
93
+ // Trust Proxy
94
+ const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
95
+ if (TRUST_PROXY) {
96
+ app.set('trust proxy', true);
97
+ }
98
+
99
+ // Session Cookie Secure
100
+ const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
101
+ if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
102
+ console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
103
+ }
104
+
105
+ // Wire up Agent Callbacks
106
+ setProgressReporter(sendExecutionUpdate);
107
+ setStopChecker((runId) => {
108
+ if (!runId) return false;
109
+ if (stopRequests.has(runId)) {
110
+ stopRequests.delete(runId);
111
+ return true;
112
+ }
113
+ return false;
114
+ });
115
+
116
+ // App Middleware
117
+ app.use(requireIpAllowlist);
118
+
119
+ // Security Headers
120
+ app.use((req, res, next) => {
121
+ res.setHeader('X-Content-Type-Options', 'nosniff');
122
+ res.setHeader('X-Frame-Options', 'SAMEORIGIN');
123
+ res.setHeader('X-XSS-Protection', '1; mode=block');
124
+ res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
125
+ next();
126
+ });
127
+
128
+ app.use(express.json({ limit: '2mb' }));
129
+
130
+ const sessionStore = new FileStore({
131
+ path: SESSIONS_DIR,
132
+ ttl: SESSION_TTL_SECONDS,
133
+ retries: 5,
134
+ retryDelay: 100,
135
+ reapInterval: 3600,
136
+ logFn: () => { }
137
+ });
138
+
139
+ // Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
140
+ sessionStore.on('error', (err) => {
141
+ if (err && err.code === 'EPERM') return; // Silently ignore
142
+ if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
143
+ console.error('[SESSION] Store error:', err);
144
+ });
145
+
146
+ app.use(session({
147
+ store: sessionStore,
148
+ secret: SESSION_SECRET,
149
+ resave: false,
150
+ saveUninitialized: false,
151
+ cookie: {
152
+ secure: SESSION_COOKIE_SECURE,
153
+ sameSite: 'strict',
154
+ maxAge: SESSION_TTL_SECONDS * 1000
155
+ }
156
+ }));
157
+
158
+ app.use(csrfProtection);
159
+
160
+ // API Routes
161
+ app.use('/api/auth', authRoutes);
162
+ app.use('/api/settings', settingsRoutes);
163
+ app.use('/api/tasks', taskRoutes);
164
+ app.use('/api/executions', executionRoutes);
165
+ app.use('/api/data', dataRoutes);
166
+ app.use('/api/schedules', scheduleRoutes);
167
+ app.use('/api/credentials', credentialRoutes);
168
+
169
+ // View Routes & Static
170
+ app.use('/', viewRoutes);
171
+
172
+ // Execution Entry Points (Top-level routes kept for compatibility/simplicity)
173
+ const registerExecution = (req, res, baseMeta = {}) => {
174
+ // This is a simplified version of the one in server.js,
175
+ // relying on the fact that handleScrape/Agent/Headful will handle the response.
176
+ // However, the original registerExecution wrapped res.json to capture result
177
+ // and appended to execution log on finish.
178
+ // We need to restore that logic here or import it.
179
+ // Since it was local to server.js, I should probably implement it here or imports.
180
+ // It depends on `appendExecution`.
181
+
182
+ // For now, I will re-implement it here using imports.
183
+ const { appendExecution } = require('./src/server/storage');
184
+
185
+ const start = Date.now();
186
+ const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
187
+ res.locals.executionId = requestId;
188
+ const originalJson = res.json.bind(res);
189
+ res.json = (body) => {
190
+ res.locals.executionResult = body;
191
+ return originalJson(body);
192
+ };
193
+ res.on('finish', () => {
194
+ const durationMs = Date.now() - start;
195
+ const body = req.body || {};
196
+ const entry = {
197
+ id: requestId,
198
+ timestamp: start,
199
+ method: req.method,
200
+ path: req.path,
201
+ status: res.statusCode,
202
+ durationMs,
203
+ source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
204
+ mode: body.mode || baseMeta.mode || 'unknown',
205
+ taskId: body.taskId || baseMeta.taskId || null,
206
+ taskName: body.name || baseMeta.taskName || null,
207
+ url: body.url || req.query.url || null,
208
+ taskSnapshot: body.taskSnapshot || null,
209
+ result: res.locals.executionResult || null
210
+ };
211
+ appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
212
+
213
+ const outputConfig = body.output || (body.taskSnapshot && body.taskSnapshot.output);
214
+ if (outputConfig && entry.result) {
215
+ pushOutput(outputConfig, entry.result.data, requestId)
216
+ .catch(err => console.error('[OUTPUT] Unexpected error:', err));
217
+ }
218
+ });
219
+ };
220
+
221
+ const preprocessScrapeRequest = (req) => {
222
+ const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
223
+ let safeVars = vars;
224
+ if (typeof vars === 'string') {
225
+ try { safeVars = JSON.parse(vars); } catch { }
226
+ } else if (typeof vars !== 'object') {
227
+ safeVars = {};
228
+ }
229
+
230
+ const resolve = (str) => {
231
+ if (typeof str !== 'string') return str;
232
+ return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
233
+ if (name === 'now') return new Date().toISOString();
234
+ const value = safeVars[name];
235
+ if (value === undefined || value === null) return '';
236
+ if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
237
+ return String(value);
238
+ }
239
+ try {
240
+ return JSON.stringify(value);
241
+ } catch {
242
+ return String(value);
243
+ }
244
+ });
245
+ };
246
+
247
+ if (req.body) {
248
+ if (req.body.url) req.body.url = resolve(req.body.url);
249
+ if (req.body.selector) req.body.selector = resolve(req.body.selector);
250
+ if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
251
+ }
252
+ if (req.query) {
253
+ if (req.query.url) req.query.url = resolve(req.query.url);
254
+ if (req.query.selector) req.query.selector = resolve(req.query.selector);
255
+ if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
256
+ }
257
+ };
258
+
259
+ const executeTaskById = async (req, res) => {
260
+ const taskId = req.params.id;
261
+ let task;
262
+ try {
263
+ await loadTasks();
264
+ task = getTaskById(taskId);
265
+ } catch (e) {
266
+ return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
267
+ }
268
+
269
+ if (!task) {
270
+ return res.status(404).json({ error: 'TASK_NOT_FOUND' });
271
+ }
272
+
273
+ registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
274
+
275
+ const clientVars = req.body.variables || req.body.taskVariables || {};
276
+ const taskVars = {};
277
+ if (task.variables) {
278
+ for (const [key, v] of Object.entries(task.variables)) {
279
+ taskVars[key] = v.value;
280
+ }
281
+ }
282
+ const runtimeVars = { ...taskVars, ...clientVars };
283
+
284
+ req.body = {
285
+ ...req.body,
286
+ ...task,
287
+ url: req.body.url || task.url,
288
+ taskId: task.id,
289
+ variables: runtimeVars,
290
+ taskVariables: runtimeVars,
291
+ actions: task.actions || [],
292
+ mode: task.mode || 'agent',
293
+ extractionScript: req.body.extractionScript || task.extractionScript
294
+ };
295
+
296
+ if (task.mode === 'scrape') {
297
+ preprocessScrapeRequest(req);
298
+ return handleScrape(req, res);
299
+ } else if (task.mode === 'headful') {
300
+ if (req.body && typeof req.body.url === 'string') {
301
+ req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
302
+ const value = runtimeVars[name];
303
+ if (value === undefined || value === null) return '';
304
+ return String(value);
305
+ });
306
+ }
307
+ return handleHeadful(req, res);
308
+ } else {
309
+ try {
310
+ const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
311
+ if (runId) {
312
+ sendExecutionUpdate(runId, { status: 'started' });
313
+ }
314
+ } catch {
315
+ // ignore
316
+ }
317
+ return handleAgent(req, res);
318
+ }
319
+ };
320
+
321
+ app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
322
+ app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
323
+
324
+ app.all('/scrape', requireAuth, dataRateLimiter, (req, res) => {
325
+ registerExecution(req, res, { mode: 'scrape' });
326
+ preprocessScrapeRequest(req);
327
+ return handleScrape(req, res);
328
+ });
329
+ app.all('/scraper', requireAuth, dataRateLimiter, (req, res) => {
330
+ registerExecution(req, res, { mode: 'scrape' });
331
+ preprocessScrapeRequest(req);
332
+ return handleScrape(req, res);
333
+ });
334
+ app.all('/agent', requireAuth, dataRateLimiter, (req, res) => {
335
+ registerExecution(req, res, { mode: 'agent' });
336
+ try {
337
+ const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
338
+ if (runId) {
339
+ sendExecutionUpdate(runId, { status: 'started' });
340
+ }
341
+ } catch {
342
+ // ignore
343
+ }
344
+ return handleAgent(req, res);
345
+ });
346
+ app.post('/headful', requireAuth, dataRateLimiter, (req, res) => {
347
+ registerExecution(req, res, { mode: 'headful' });
348
+ if (req.body && typeof req.body.url === 'string') {
349
+ const vars = req.body.taskVariables || req.body.variables || {};
350
+ req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
351
+ const value = vars[name];
352
+ if (value === undefined || value === null) return '';
353
+ return String(value);
354
+ });
355
+ }
356
+ return handleHeadful(req, res);
357
+ });
358
+ app.post('/headful/stop', requireAuth, stopHeadful);
359
+
360
+ // Ensure public/captures directory exists
361
+ const capturesDir = path.join(__dirname, 'public', 'captures');
362
+ if (!fs.existsSync(capturesDir)) {
363
+ fs.mkdirSync(capturesDir, { recursive: true });
364
+ }
365
+
366
+ // NoVNC Setup
367
+ const novncDirCandidates = [
368
+ '/opt/novnc',
369
+ '/usr/share/novnc'
370
+ ];
371
+ const novncDir = novncDirCandidates.find((candidate) => {
372
+ try {
373
+ return fs.existsSync(candidate);
374
+ } catch {
375
+ return false;
376
+ }
377
+ });
378
+ const novncEnabled = !!novncDir;
379
+ if (novncDir) {
380
+ app.use('/novnc', express.static(novncDir));
381
+ }
382
+
383
+ // Static Files
384
+ app.use('/captures', requireAuthOrApiKey, express.static(capturesDir));
385
+ app.use('/screenshots', requireAuthOrApiKey, express.static(capturesDir));
386
+ app.use(express.static(DIST_DIR));
387
+
388
+ // Headful Status Endpoint
389
+ app.get('/api/headful/status', async (req, res) => {
390
+ if (!novncEnabled) {
391
+ return res.json({ useNovnc: false });
392
+ }
393
+ // Check if the novnc port is actually in use
394
+ const portAvailable = await isPortAvailable(NOVNC_PORT);
395
+ // If the port is NOT available, something (websockify) is listening on it
396
+ res.json({ useNovnc: !portAvailable });
397
+ });
398
+
399
+ app.get('/api/headful/selector_stream', requireAuth, (req, res) => {
400
+ res.setHeader('Content-Type', 'text/event-stream');
401
+ res.setHeader('Cache-Control', 'no-cache');
402
+ res.setHeader('Connection', 'keep-alive');
403
+ if (typeof res.flushHeaders === 'function') res.flushHeaders();
404
+ res.write('event: ready\ndata: {}\n\n');
405
+
406
+ const onSelectorSelected = (selector) => {
407
+ try {
408
+ res.write(`data: ${JSON.stringify({ selector })}\n\n`);
409
+ } catch (err) {
410
+ // ignore
411
+ }
412
+ };
413
+
414
+ headfulEventEmitter.on('selectorSelected', onSelectorSelected);
415
+
416
+ const keepAlive = setInterval(() => {
417
+ try {
418
+ res.write(':keep-alive\n\n');
419
+ } catch {
420
+ // ignore
421
+ }
422
+ }, 20000);
423
+
424
+ req.on('close', () => {
425
+ clearInterval(keepAlive);
426
+ headfulEventEmitter.off('selectorSelected', onSelectorSelected);
427
+ });
428
+ });
429
+
430
+ app.get('/headful/selector_stream', requireAuth, (req, res) => {
431
+ res.setHeader('Content-Type', 'text/event-stream');
432
+ res.setHeader('Cache-Control', 'no-cache');
433
+ res.setHeader('Connection', 'keep-alive');
434
+ if (typeof res.flushHeaders === 'function') res.flushHeaders();
435
+ res.write('event: ready\ndata: {}\n\n');
436
+
437
+ const onSelectorSelected = (selector) => {
438
+ try {
439
+ res.write(`data: ${JSON.stringify({ selector })}\n\n`);
440
+ } catch (err) {
441
+ // ignore
442
+ }
443
+ };
444
+
445
+ headfulEventEmitter.on('selectorSelected', onSelectorSelected);
446
+
447
+ const keepAlive = setInterval(() => {
448
+ try {
449
+ res.write(':keep-alive\n\n');
450
+ } catch {
451
+ // ignore
452
+ }
453
+ }, 20000);
454
+
455
+ req.on('close', () => {
456
+ clearInterval(keepAlive);
457
+ headfulEventEmitter.off('selectorSelected', onSelectorSelected);
458
+ });
459
+ });
460
+
461
+ app.post('/api/headful/inspect', requireAuth, toggleInspectMode);
462
+ app.post('/headful/inspect', requireAuth, toggleInspectMode);
463
+
464
+ // Start Server
465
+ findAvailablePort(port, 20)
466
+ .then((availablePort) => {
467
+ if (availablePort !== port) {
468
+ console.log(`Port ${port} in use, switched to ${availablePort}.`);
469
+ }
470
+ const server = app.listen(availablePort, '0.0.0.0', () => {
471
+ const address = server.address();
472
+ const displayPort = typeof address === 'object' && address ? address.port : availablePort;
473
+ console.log(`Server running at http://localhost:${displayPort}`);
474
+
475
+ // Start the cron scheduler
476
+ const { startScheduler } = require('./src/server/scheduler');
477
+ startScheduler().catch(err => console.error('[SCHEDULER] Failed to start:', err.message));
478
+ });
479
+ server.on('upgrade', async (req, socket, head) => {
480
+ if (!await isIpAllowed(req.socket?.remoteAddress)) {
481
+ try {
482
+ socket.destroy();
483
+ } catch {
484
+ // ignore
485
+ }
486
+ return;
487
+ }
488
+ const handled = proxyWebsockify(req, socket, head);
489
+ if (!handled) {
490
+ socket.destroy();
491
+ }
492
+ });
493
+ server.on('error', (err) => {
494
+ console.error('Server failed to start:', err.message || err);
495
+ process.exit(1);
496
+ });
497
+ })
498
+ .catch((err) => {
499
+ console.error('Server failed to start:', err.message || err);
500
+ process.exit(1);
501
+ });