figranium 0.9.1 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -674
- package/README.md +336 -318
- package/agent.js +1 -1
- package/common-utils.js +211 -166
- package/dist/assets/index--OZi5-p_.css +1 -0
- package/dist/assets/index-Bkr74C53.js +15 -0
- package/dist/index.html +26 -26
- package/dist/novnc.html +108 -108
- package/extraction-worker.js +204 -197
- package/headful.js +583 -219
- package/html-utils.js +24 -24
- package/package.json +81 -78
- package/proxy-rotation.js +261 -261
- package/proxy-utils.js +84 -84
- package/public/novnc.html +108 -108
- package/scrape.js +418 -374
- package/server.js +501 -404
- package/src/server/cron-parser.js +316 -0
- package/src/server/routes/schedules.js +171 -0
- package/src/server/scheduler.js +381 -0
- package/url-utils.js +137 -116
- package/user-agent-settings.js +76 -76
- package/dist/assets/index-ALim18cn.css +0 -1
- package/dist/assets/index-D8YbCWRx.js +0 -15
package/server.js
CHANGED
|
@@ -1,404 +1,501 @@
|
|
|
1
|
-
const express = require('express');
|
|
2
|
-
const session = require('express-session');
|
|
3
|
-
const FileStore = require('session-file-store')(session);
|
|
4
|
-
const fs = require('fs');
|
|
5
|
-
const path = require('path');
|
|
6
|
-
const crypto = require('crypto');
|
|
7
|
-
|
|
8
|
-
// Constants
|
|
9
|
-
const {
|
|
10
|
-
DEFAULT_PORT,
|
|
11
|
-
DIST_DIR,
|
|
12
|
-
DATA_DIR,
|
|
13
|
-
SESSIONS_DIR,
|
|
14
|
-
SESSION_SECRET_FILE,
|
|
15
|
-
SESSION_TTL_SECONDS,
|
|
16
|
-
NOVNC_PORT,
|
|
17
|
-
WEBSOCKIFY_PATH
|
|
18
|
-
} = require('./src/server/constants');
|
|
19
|
-
|
|
20
|
-
const {
|
|
21
|
-
loadTasks,
|
|
22
|
-
getTaskById
|
|
23
|
-
} = require('./src/server/storage');
|
|
24
|
-
|
|
25
|
-
// Context & Utils
|
|
26
|
-
const {
|
|
27
|
-
executionStreams,
|
|
28
|
-
stopRequests,
|
|
29
|
-
sendExecutionUpdate
|
|
30
|
-
} = require('./src/server/state');
|
|
31
|
-
const {
|
|
32
|
-
findAvailablePort,
|
|
33
|
-
proxyWebsockify
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
const {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
const
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
const
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if (!
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
//
|
|
94
|
-
const
|
|
95
|
-
if (
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
//
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
app.use(
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
};
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if (
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
taskId
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
app.post('/
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
return
|
|
333
|
-
});
|
|
334
|
-
app.
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
const
|
|
338
|
-
if (
|
|
339
|
-
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
//
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
.
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
});
|
|
1
|
+
const express = require('express');
|
|
2
|
+
const session = require('express-session');
|
|
3
|
+
const FileStore = require('session-file-store')(session);
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const crypto = require('crypto');
|
|
7
|
+
|
|
8
|
+
// Constants
|
|
9
|
+
const {
|
|
10
|
+
DEFAULT_PORT,
|
|
11
|
+
DIST_DIR,
|
|
12
|
+
DATA_DIR,
|
|
13
|
+
SESSIONS_DIR,
|
|
14
|
+
SESSION_SECRET_FILE,
|
|
15
|
+
SESSION_TTL_SECONDS,
|
|
16
|
+
NOVNC_PORT,
|
|
17
|
+
WEBSOCKIFY_PATH
|
|
18
|
+
} = require('./src/server/constants');
|
|
19
|
+
|
|
20
|
+
const {
|
|
21
|
+
loadTasks,
|
|
22
|
+
getTaskById
|
|
23
|
+
} = require('./src/server/storage');
|
|
24
|
+
|
|
25
|
+
// Context & Utils
|
|
26
|
+
const {
|
|
27
|
+
executionStreams,
|
|
28
|
+
stopRequests,
|
|
29
|
+
sendExecutionUpdate
|
|
30
|
+
} = require('./src/server/state');
|
|
31
|
+
const {
|
|
32
|
+
findAvailablePort,
|
|
33
|
+
proxyWebsockify,
|
|
34
|
+
isPortAvailable
|
|
35
|
+
} = require('./src/server/utils');
|
|
36
|
+
|
|
37
|
+
// Middleware
|
|
38
|
+
const {
|
|
39
|
+
authRateLimiter,
|
|
40
|
+
dataRateLimiter,
|
|
41
|
+
csrfProtection,
|
|
42
|
+
requireIpAllowlist,
|
|
43
|
+
requireAuth,
|
|
44
|
+
isIpAllowed,
|
|
45
|
+
requireApiKey,
|
|
46
|
+
requireAuthOrApiKey
|
|
47
|
+
} = require('./src/server/middleware');
|
|
48
|
+
|
|
49
|
+
// Feature Modules (Legacy/Existing)
|
|
50
|
+
const { handleScrape } = require('./scrape');
|
|
51
|
+
const { handleAgent, setProgressReporter, setStopChecker } = require('./agent');
|
|
52
|
+
const { handleHeadful, stopHeadful, toggleInspectMode, headfulEventEmitter } = require('./headful');
|
|
53
|
+
|
|
54
|
+
// Routes
|
|
55
|
+
const authRoutes = require('./src/server/routes/auth');
|
|
56
|
+
const settingsRoutes = require('./src/server/routes/settings');
|
|
57
|
+
const taskRoutes = require('./src/server/routes/tasks');
|
|
58
|
+
const executionRoutes = require('./src/server/routes/executions');
|
|
59
|
+
const dataRoutes = require('./src/server/routes/data');
|
|
60
|
+
const viewRoutes = require('./src/server/routes/views');
|
|
61
|
+
const scheduleRoutes = require('./src/server/routes/schedules');
|
|
62
|
+
const credentialRoutes = require('./src/server/routes/credentials');
|
|
63
|
+
const { pushOutput } = require('./src/server/outputProviders');
|
|
64
|
+
|
|
65
|
+
const app = express();
|
|
66
|
+
app.disable('x-powered-by');
|
|
67
|
+
const port = Number(process.env.PORT) || DEFAULT_PORT;
|
|
68
|
+
|
|
69
|
+
// Session Secret Setup
|
|
70
|
+
let SESSION_SECRET = process.env.SESSION_SECRET;
|
|
71
|
+
if (!SESSION_SECRET) {
|
|
72
|
+
try {
|
|
73
|
+
if (fs.existsSync(SESSION_SECRET_FILE)) {
|
|
74
|
+
SESSION_SECRET = fs.readFileSync(SESSION_SECRET_FILE, 'utf8').trim();
|
|
75
|
+
} else {
|
|
76
|
+
// Generate secret using crypto.randomBytes
|
|
77
|
+
SESSION_SECRET = crypto.randomBytes(48).toString('hex');
|
|
78
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
79
|
+
fs.writeFileSync(SESSION_SECRET_FILE, SESSION_SECRET);
|
|
80
|
+
}
|
|
81
|
+
} catch (e) {
|
|
82
|
+
console.warn('Failed to load session secret from disk, falling back to process env only.');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (!SESSION_SECRET) {
|
|
86
|
+
throw new Error('SESSION_SECRET environment variable is required');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Ensure Directories
|
|
90
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
91
|
+
if (!fs.existsSync(SESSIONS_DIR)) fs.mkdirSync(SESSIONS_DIR, { recursive: true });
|
|
92
|
+
|
|
93
|
+
// Trust Proxy
|
|
94
|
+
const TRUST_PROXY = ['1', 'true', 'yes'].includes(String(process.env.TRUST_PROXY || '').toLowerCase());
|
|
95
|
+
if (TRUST_PROXY) {
|
|
96
|
+
app.set('trust proxy', true);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Session Cookie Secure
|
|
100
|
+
const SESSION_COOKIE_SECURE = ['1', 'true', 'yes'].includes(String(process.env.SESSION_COOKIE_SECURE || '').toLowerCase());
|
|
101
|
+
if (!SESSION_COOKIE_SECURE && process.env.NODE_ENV === 'production') {
|
|
102
|
+
console.warn('[SECURITY] SESSION_COOKIE_SECURE is not enabled. Set SESSION_COOKIE_SECURE=1 when running behind HTTPS.');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Wire up Agent Callbacks
|
|
106
|
+
setProgressReporter(sendExecutionUpdate);
|
|
107
|
+
setStopChecker((runId) => {
|
|
108
|
+
if (!runId) return false;
|
|
109
|
+
if (stopRequests.has(runId)) {
|
|
110
|
+
stopRequests.delete(runId);
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
return false;
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// App Middleware
|
|
117
|
+
app.use(requireIpAllowlist);
|
|
118
|
+
|
|
119
|
+
// Security Headers
|
|
120
|
+
app.use((req, res, next) => {
|
|
121
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
122
|
+
res.setHeader('X-Frame-Options', 'SAMEORIGIN');
|
|
123
|
+
res.setHeader('X-XSS-Protection', '1; mode=block');
|
|
124
|
+
res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
|
|
125
|
+
next();
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
app.use(express.json({ limit: '2mb' }));
|
|
129
|
+
|
|
130
|
+
const sessionStore = new FileStore({
|
|
131
|
+
path: SESSIONS_DIR,
|
|
132
|
+
ttl: SESSION_TTL_SECONDS,
|
|
133
|
+
retries: 5,
|
|
134
|
+
retryDelay: 100,
|
|
135
|
+
reapInterval: 3600,
|
|
136
|
+
logFn: () => { }
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// Suppress session file store EPERM errors on Windows (antivirus/indexer file locking)
|
|
140
|
+
sessionStore.on('error', (err) => {
|
|
141
|
+
if (err && err.code === 'EPERM') return; // Silently ignore
|
|
142
|
+
if (err && err.code === 'ENOENT') return; // Session file deleted between read attempts
|
|
143
|
+
console.error('[SESSION] Store error:', err);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
app.use(session({
|
|
147
|
+
store: sessionStore,
|
|
148
|
+
secret: SESSION_SECRET,
|
|
149
|
+
resave: false,
|
|
150
|
+
saveUninitialized: false,
|
|
151
|
+
cookie: {
|
|
152
|
+
secure: SESSION_COOKIE_SECURE,
|
|
153
|
+
sameSite: 'strict',
|
|
154
|
+
maxAge: SESSION_TTL_SECONDS * 1000
|
|
155
|
+
}
|
|
156
|
+
}));
|
|
157
|
+
|
|
158
|
+
app.use(csrfProtection);
|
|
159
|
+
|
|
160
|
+
// API Routes
|
|
161
|
+
app.use('/api/auth', authRoutes);
|
|
162
|
+
app.use('/api/settings', settingsRoutes);
|
|
163
|
+
app.use('/api/tasks', taskRoutes);
|
|
164
|
+
app.use('/api/executions', executionRoutes);
|
|
165
|
+
app.use('/api/data', dataRoutes);
|
|
166
|
+
app.use('/api/schedules', scheduleRoutes);
|
|
167
|
+
app.use('/api/credentials', credentialRoutes);
|
|
168
|
+
|
|
169
|
+
// View Routes & Static
|
|
170
|
+
app.use('/', viewRoutes);
|
|
171
|
+
|
|
172
|
+
// Execution Entry Points (Top-level routes kept for compatibility/simplicity)
|
|
173
|
+
const registerExecution = (req, res, baseMeta = {}) => {
|
|
174
|
+
// This is a simplified version of the one in server.js,
|
|
175
|
+
// relying on the fact that handleScrape/Agent/Headful will handle the response.
|
|
176
|
+
// However, the original registerExecution wrapped res.json to capture result
|
|
177
|
+
// and appended to execution log on finish.
|
|
178
|
+
// We need to restore that logic here or import it.
|
|
179
|
+
// Since it was local to server.js, I should probably implement it here or imports.
|
|
180
|
+
// It depends on `appendExecution`.
|
|
181
|
+
|
|
182
|
+
// For now, I will re-implement it here using imports.
|
|
183
|
+
const { appendExecution } = require('./src/server/storage');
|
|
184
|
+
|
|
185
|
+
const start = Date.now();
|
|
186
|
+
const requestId = 'exec_' + start + '_' + Math.floor(Math.random() * 1000);
|
|
187
|
+
res.locals.executionId = requestId;
|
|
188
|
+
const originalJson = res.json.bind(res);
|
|
189
|
+
res.json = (body) => {
|
|
190
|
+
res.locals.executionResult = body;
|
|
191
|
+
return originalJson(body);
|
|
192
|
+
};
|
|
193
|
+
res.on('finish', () => {
|
|
194
|
+
const durationMs = Date.now() - start;
|
|
195
|
+
const body = req.body || {};
|
|
196
|
+
const entry = {
|
|
197
|
+
id: requestId,
|
|
198
|
+
timestamp: start,
|
|
199
|
+
method: req.method,
|
|
200
|
+
path: req.path,
|
|
201
|
+
status: res.statusCode,
|
|
202
|
+
durationMs,
|
|
203
|
+
source: body.runSource || req.query.runSource || baseMeta.source || 'unknown',
|
|
204
|
+
mode: body.mode || baseMeta.mode || 'unknown',
|
|
205
|
+
taskId: body.taskId || baseMeta.taskId || null,
|
|
206
|
+
taskName: body.name || baseMeta.taskName || null,
|
|
207
|
+
url: body.url || req.query.url || null,
|
|
208
|
+
taskSnapshot: body.taskSnapshot || null,
|
|
209
|
+
result: res.locals.executionResult || null
|
|
210
|
+
};
|
|
211
|
+
appendExecution(entry).catch(err => console.error('Failed to append execution:', err));
|
|
212
|
+
|
|
213
|
+
const outputConfig = body.output || (body.taskSnapshot && body.taskSnapshot.output);
|
|
214
|
+
if (outputConfig && entry.result) {
|
|
215
|
+
pushOutput(outputConfig, entry.result.data, requestId)
|
|
216
|
+
.catch(err => console.error('[OUTPUT] Unexpected error:', err));
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
const preprocessScrapeRequest = (req) => {
|
|
222
|
+
const vars = req.body?.taskVariables || req.body?.variables || req.query?.taskVariables || req.query?.variables || {};
|
|
223
|
+
let safeVars = vars;
|
|
224
|
+
if (typeof vars === 'string') {
|
|
225
|
+
try { safeVars = JSON.parse(vars); } catch { }
|
|
226
|
+
} else if (typeof vars !== 'object') {
|
|
227
|
+
safeVars = {};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const resolve = (str) => {
|
|
231
|
+
if (typeof str !== 'string') return str;
|
|
232
|
+
return str.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
|
|
233
|
+
if (name === 'now') return new Date().toISOString();
|
|
234
|
+
const value = safeVars[name];
|
|
235
|
+
if (value === undefined || value === null) return '';
|
|
236
|
+
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
237
|
+
return String(value);
|
|
238
|
+
}
|
|
239
|
+
try {
|
|
240
|
+
return JSON.stringify(value);
|
|
241
|
+
} catch {
|
|
242
|
+
return String(value);
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
if (req.body) {
|
|
248
|
+
if (req.body.url) req.body.url = resolve(req.body.url);
|
|
249
|
+
if (req.body.selector) req.body.selector = resolve(req.body.selector);
|
|
250
|
+
if (req.body.extractionScript) req.body.extractionScript = resolve(req.body.extractionScript);
|
|
251
|
+
}
|
|
252
|
+
if (req.query) {
|
|
253
|
+
if (req.query.url) req.query.url = resolve(req.query.url);
|
|
254
|
+
if (req.query.selector) req.query.selector = resolve(req.query.selector);
|
|
255
|
+
if (req.query.extractionScript) req.query.extractionScript = resolve(req.query.extractionScript);
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const executeTaskById = async (req, res) => {
|
|
260
|
+
const taskId = req.params.id;
|
|
261
|
+
let task;
|
|
262
|
+
try {
|
|
263
|
+
await loadTasks();
|
|
264
|
+
task = getTaskById(taskId);
|
|
265
|
+
} catch (e) {
|
|
266
|
+
return res.status(500).json({ error: 'FAILED_TO_LOAD_TASK' });
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (!task) {
|
|
270
|
+
return res.status(404).json({ error: 'TASK_NOT_FOUND' });
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
registerExecution(req, res, { mode: task.mode || 'agent', taskId: task.id, taskName: task.name });
|
|
274
|
+
|
|
275
|
+
const clientVars = req.body.variables || req.body.taskVariables || {};
|
|
276
|
+
const taskVars = {};
|
|
277
|
+
if (task.variables) {
|
|
278
|
+
for (const [key, v] of Object.entries(task.variables)) {
|
|
279
|
+
taskVars[key] = v.value;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
const runtimeVars = { ...taskVars, ...clientVars };
|
|
283
|
+
|
|
284
|
+
req.body = {
|
|
285
|
+
...req.body,
|
|
286
|
+
...task,
|
|
287
|
+
url: req.body.url || task.url,
|
|
288
|
+
taskId: task.id,
|
|
289
|
+
variables: runtimeVars,
|
|
290
|
+
taskVariables: runtimeVars,
|
|
291
|
+
actions: task.actions || [],
|
|
292
|
+
mode: task.mode || 'agent',
|
|
293
|
+
extractionScript: req.body.extractionScript || task.extractionScript
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
if (task.mode === 'scrape') {
|
|
297
|
+
preprocessScrapeRequest(req);
|
|
298
|
+
return handleScrape(req, res);
|
|
299
|
+
} else if (task.mode === 'headful') {
|
|
300
|
+
if (req.body && typeof req.body.url === 'string') {
|
|
301
|
+
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
302
|
+
const value = runtimeVars[name];
|
|
303
|
+
if (value === undefined || value === null) return '';
|
|
304
|
+
return String(value);
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
return handleHeadful(req, res);
|
|
308
|
+
} else {
|
|
309
|
+
try {
|
|
310
|
+
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
311
|
+
if (runId) {
|
|
312
|
+
sendExecutionUpdate(runId, { status: 'started' });
|
|
313
|
+
}
|
|
314
|
+
} catch {
|
|
315
|
+
// ignore
|
|
316
|
+
}
|
|
317
|
+
return handleAgent(req, res);
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
app.post('/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
|
|
322
|
+
app.post('/api/tasks/:id/api', requireApiKey, dataRateLimiter, executeTaskById);
|
|
323
|
+
|
|
324
|
+
app.all('/scrape', requireAuth, dataRateLimiter, (req, res) => {
|
|
325
|
+
registerExecution(req, res, { mode: 'scrape' });
|
|
326
|
+
preprocessScrapeRequest(req);
|
|
327
|
+
return handleScrape(req, res);
|
|
328
|
+
});
|
|
329
|
+
app.all('/scraper', requireAuth, dataRateLimiter, (req, res) => {
|
|
330
|
+
registerExecution(req, res, { mode: 'scrape' });
|
|
331
|
+
preprocessScrapeRequest(req);
|
|
332
|
+
return handleScrape(req, res);
|
|
333
|
+
});
|
|
334
|
+
app.all('/agent', requireAuth, dataRateLimiter, (req, res) => {
|
|
335
|
+
registerExecution(req, res, { mode: 'agent' });
|
|
336
|
+
try {
|
|
337
|
+
const runId = String((req.body && req.body.runId) || req.query.runId || '').trim();
|
|
338
|
+
if (runId) {
|
|
339
|
+
sendExecutionUpdate(runId, { status: 'started' });
|
|
340
|
+
}
|
|
341
|
+
} catch {
|
|
342
|
+
// ignore
|
|
343
|
+
}
|
|
344
|
+
return handleAgent(req, res);
|
|
345
|
+
});
|
|
346
|
+
app.post('/headful', requireAuth, dataRateLimiter, (req, res) => {
|
|
347
|
+
registerExecution(req, res, { mode: 'headful' });
|
|
348
|
+
if (req.body && typeof req.body.url === 'string') {
|
|
349
|
+
const vars = req.body.taskVariables || req.body.variables || {};
|
|
350
|
+
req.body.url = req.body.url.replace(/\{\$(\w+)\}/g, (_match, name) => {
|
|
351
|
+
const value = vars[name];
|
|
352
|
+
if (value === undefined || value === null) return '';
|
|
353
|
+
return String(value);
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
return handleHeadful(req, res);
|
|
357
|
+
});
|
|
358
|
+
app.post('/headful/stop', requireAuth, stopHeadful);
|
|
359
|
+
|
|
360
|
+
// Ensure public/captures directory exists
|
|
361
|
+
const capturesDir = path.join(__dirname, 'public', 'captures');
|
|
362
|
+
if (!fs.existsSync(capturesDir)) {
|
|
363
|
+
fs.mkdirSync(capturesDir, { recursive: true });
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// NoVNC Setup
|
|
367
|
+
const novncDirCandidates = [
|
|
368
|
+
'/opt/novnc',
|
|
369
|
+
'/usr/share/novnc'
|
|
370
|
+
];
|
|
371
|
+
const novncDir = novncDirCandidates.find((candidate) => {
|
|
372
|
+
try {
|
|
373
|
+
return fs.existsSync(candidate);
|
|
374
|
+
} catch {
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
const novncEnabled = !!novncDir;
|
|
379
|
+
if (novncDir) {
|
|
380
|
+
app.use('/novnc', express.static(novncDir));
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Static Files
|
|
384
|
+
app.use('/captures', requireAuthOrApiKey, express.static(capturesDir));
|
|
385
|
+
app.use('/screenshots', requireAuthOrApiKey, express.static(capturesDir));
|
|
386
|
+
app.use(express.static(DIST_DIR));
|
|
387
|
+
|
|
388
|
+
// Headful Status Endpoint
|
|
389
|
+
app.get('/api/headful/status', async (req, res) => {
|
|
390
|
+
if (!novncEnabled) {
|
|
391
|
+
return res.json({ useNovnc: false });
|
|
392
|
+
}
|
|
393
|
+
// Check if the novnc port is actually in use
|
|
394
|
+
const portAvailable = await isPortAvailable(NOVNC_PORT);
|
|
395
|
+
// If the port is NOT available, something (websockify) is listening on it
|
|
396
|
+
res.json({ useNovnc: !portAvailable });
|
|
397
|
+
});
|
|
398
|
+
|
|
399
|
+
app.get('/api/headful/selector_stream', requireAuth, (req, res) => {
|
|
400
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
401
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
402
|
+
res.setHeader('Connection', 'keep-alive');
|
|
403
|
+
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
404
|
+
res.write('event: ready\ndata: {}\n\n');
|
|
405
|
+
|
|
406
|
+
const onSelectorSelected = (selector) => {
|
|
407
|
+
try {
|
|
408
|
+
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
409
|
+
} catch (err) {
|
|
410
|
+
// ignore
|
|
411
|
+
}
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
415
|
+
|
|
416
|
+
const keepAlive = setInterval(() => {
|
|
417
|
+
try {
|
|
418
|
+
res.write(':keep-alive\n\n');
|
|
419
|
+
} catch {
|
|
420
|
+
// ignore
|
|
421
|
+
}
|
|
422
|
+
}, 20000);
|
|
423
|
+
|
|
424
|
+
req.on('close', () => {
|
|
425
|
+
clearInterval(keepAlive);
|
|
426
|
+
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
427
|
+
});
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
app.get('/headful/selector_stream', requireAuth, (req, res) => {
|
|
431
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
432
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
433
|
+
res.setHeader('Connection', 'keep-alive');
|
|
434
|
+
if (typeof res.flushHeaders === 'function') res.flushHeaders();
|
|
435
|
+
res.write('event: ready\ndata: {}\n\n');
|
|
436
|
+
|
|
437
|
+
const onSelectorSelected = (selector) => {
|
|
438
|
+
try {
|
|
439
|
+
res.write(`data: ${JSON.stringify({ selector })}\n\n`);
|
|
440
|
+
} catch (err) {
|
|
441
|
+
// ignore
|
|
442
|
+
}
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
headfulEventEmitter.on('selectorSelected', onSelectorSelected);
|
|
446
|
+
|
|
447
|
+
const keepAlive = setInterval(() => {
|
|
448
|
+
try {
|
|
449
|
+
res.write(':keep-alive\n\n');
|
|
450
|
+
} catch {
|
|
451
|
+
// ignore
|
|
452
|
+
}
|
|
453
|
+
}, 20000);
|
|
454
|
+
|
|
455
|
+
req.on('close', () => {
|
|
456
|
+
clearInterval(keepAlive);
|
|
457
|
+
headfulEventEmitter.off('selectorSelected', onSelectorSelected);
|
|
458
|
+
});
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
app.post('/api/headful/inspect', requireAuth, toggleInspectMode);
|
|
462
|
+
app.post('/headful/inspect', requireAuth, toggleInspectMode);
|
|
463
|
+
|
|
464
|
+
// Start Server
|
|
465
|
+
findAvailablePort(port, 20)
|
|
466
|
+
.then((availablePort) => {
|
|
467
|
+
if (availablePort !== port) {
|
|
468
|
+
console.log(`Port ${port} in use, switched to ${availablePort}.`);
|
|
469
|
+
}
|
|
470
|
+
const server = app.listen(availablePort, '0.0.0.0', () => {
|
|
471
|
+
const address = server.address();
|
|
472
|
+
const displayPort = typeof address === 'object' && address ? address.port : availablePort;
|
|
473
|
+
console.log(`Server running at http://localhost:${displayPort}`);
|
|
474
|
+
|
|
475
|
+
// Start the cron scheduler
|
|
476
|
+
const { startScheduler } = require('./src/server/scheduler');
|
|
477
|
+
startScheduler().catch(err => console.error('[SCHEDULER] Failed to start:', err.message));
|
|
478
|
+
});
|
|
479
|
+
server.on('upgrade', async (req, socket, head) => {
|
|
480
|
+
if (!await isIpAllowed(req.socket?.remoteAddress)) {
|
|
481
|
+
try {
|
|
482
|
+
socket.destroy();
|
|
483
|
+
} catch {
|
|
484
|
+
// ignore
|
|
485
|
+
}
|
|
486
|
+
return;
|
|
487
|
+
}
|
|
488
|
+
const handled = proxyWebsockify(req, socket, head);
|
|
489
|
+
if (!handled) {
|
|
490
|
+
socket.destroy();
|
|
491
|
+
}
|
|
492
|
+
});
|
|
493
|
+
server.on('error', (err) => {
|
|
494
|
+
console.error('Server failed to start:', err.message || err);
|
|
495
|
+
process.exit(1);
|
|
496
|
+
});
|
|
497
|
+
})
|
|
498
|
+
.catch((err) => {
|
|
499
|
+
console.error('Server failed to start:', err.message || err);
|
|
500
|
+
process.exit(1);
|
|
501
|
+
});
|