figranium 0.9.1 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -674
- package/README.md +336 -318
- package/agent.js +1 -1
- package/common-utils.js +211 -166
- package/dist/assets/index--OZi5-p_.css +1 -0
- package/dist/assets/index-Bkr74C53.js +15 -0
- package/dist/index.html +26 -26
- package/dist/novnc.html +108 -108
- package/extraction-worker.js +204 -197
- package/headful.js +583 -219
- package/html-utils.js +24 -24
- package/package.json +81 -78
- package/proxy-rotation.js +261 -261
- package/proxy-utils.js +84 -84
- package/public/novnc.html +108 -108
- package/scrape.js +418 -374
- package/server.js +501 -404
- package/src/server/cron-parser.js +316 -0
- package/src/server/routes/schedules.js +171 -0
- package/src/server/scheduler.js +381 -0
- package/url-utils.js +137 -116
- package/user-agent-settings.js +76 -76
- package/dist/assets/index-ALim18cn.css +0 -1
- package/dist/assets/index-D8YbCWRx.js +0 -15
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process task scheduler.
|
|
3
|
+
* Loads tasks with schedule.enabled = true, computes next runs,
|
|
4
|
+
* and executes them at the correct time using setTimeout.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { loadTasks, saveTasks, getTaskById } = require('./storage');
|
|
8
|
+
const { appendExecution } = require('./storage');
|
|
9
|
+
const { getNextRun, scheduleToCron, isValidCron } = require('./cron-parser');
|
|
10
|
+
const { sendExecutionUpdate } = require('./state');
|
|
11
|
+
|
|
12
|
+
// Internal state
|
|
13
|
+
let schedulerTimer = null;
|
|
14
|
+
let scheduledTasks = new Map(); // taskId -> { cron, nextRun: Date }
|
|
15
|
+
let running = false;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Resolve the effective cron expression for a task schedule.
|
|
19
|
+
* Supports both visual (no-code) config and advanced raw cron.
|
|
20
|
+
*/
|
|
21
|
+
function resolveCron(schedule) {
|
|
22
|
+
if (!schedule) return null;
|
|
23
|
+
// If user supplied a raw cron expression (advanced mode)
|
|
24
|
+
if (schedule.cron && isValidCron(schedule.cron)) {
|
|
25
|
+
return schedule.cron;
|
|
26
|
+
}
|
|
27
|
+
// Otherwise build from visual config
|
|
28
|
+
if (schedule.frequency) {
|
|
29
|
+
try {
|
|
30
|
+
return scheduleToCron(schedule);
|
|
31
|
+
} catch {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Load all tasks with active schedules and compute next runs.
|
|
40
|
+
*/
|
|
41
|
+
async function loadSchedules() {
|
|
42
|
+
const tasks = await loadTasks();
|
|
43
|
+
scheduledTasks.clear();
|
|
44
|
+
|
|
45
|
+
for (const task of tasks) {
|
|
46
|
+
if (!task.schedule || !task.schedule.enabled) continue;
|
|
47
|
+
const cron = resolveCron(task.schedule);
|
|
48
|
+
if (!cron) continue;
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
const nextRun = getNextRun(cron);
|
|
52
|
+
scheduledTasks.set(task.id, { cron, nextRun });
|
|
53
|
+
} catch (err) {
|
|
54
|
+
console.error(`[SCHEDULER] Failed to compute next run for task "${task.name}" (${task.id}):`, err.message);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Find the soonest task and schedule a timer for it.
|
|
61
|
+
*/
|
|
62
|
+
function scheduleNext() {
|
|
63
|
+
if (schedulerTimer) {
|
|
64
|
+
clearTimeout(schedulerTimer);
|
|
65
|
+
schedulerTimer = null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (scheduledTasks.size === 0 || !running) return;
|
|
69
|
+
|
|
70
|
+
let soonestId = null;
|
|
71
|
+
let soonestTime = Infinity;
|
|
72
|
+
|
|
73
|
+
for (const [taskId, info] of scheduledTasks) {
|
|
74
|
+
const t = info.nextRun.getTime();
|
|
75
|
+
if (t < soonestTime) {
|
|
76
|
+
soonestTime = t;
|
|
77
|
+
soonestId = taskId;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (!soonestId) return;
|
|
82
|
+
|
|
83
|
+
const delay = Math.max(0, soonestTime - Date.now());
|
|
84
|
+
// Cap delay to 2^31-1 ms (~24.8 days) to avoid setTimeout overflow
|
|
85
|
+
const safeDelay = Math.min(delay, 2147483647);
|
|
86
|
+
|
|
87
|
+
schedulerTimer = setTimeout(() => {
|
|
88
|
+
if (!running) return;
|
|
89
|
+
// If we had to cap the delay, just re-schedule
|
|
90
|
+
if (safeDelay < delay) {
|
|
91
|
+
scheduleNext();
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
tick(soonestId);
|
|
95
|
+
}, safeDelay);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Execute a scheduled task and re-compute its next run.
|
|
100
|
+
*/
|
|
101
|
+
async function tick(taskId) {
|
|
102
|
+
if (!running) return;
|
|
103
|
+
|
|
104
|
+
const info = scheduledTasks.get(taskId);
|
|
105
|
+
if (!info) {
|
|
106
|
+
scheduleNext();
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
console.log(`[SCHEDULER] Executing task "${taskId}" (cron: ${info.cron})`);
|
|
111
|
+
|
|
112
|
+
const startTime = Date.now();
|
|
113
|
+
let status = 'success';
|
|
114
|
+
let result = null;
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
result = await executeScheduledTask(taskId);
|
|
118
|
+
} catch (err) {
|
|
119
|
+
status = 'error';
|
|
120
|
+
console.error(`[SCHEDULER] Task "${taskId}" failed:`, err.message);
|
|
121
|
+
result = { error: err.message };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const durationMs = Date.now() - startTime;
|
|
125
|
+
|
|
126
|
+
// Update the task's schedule metadata
|
|
127
|
+
try {
|
|
128
|
+
const tasks = await loadTasks();
|
|
129
|
+
const task = tasks.find(t => t.id === taskId);
|
|
130
|
+
if (task && task.schedule) {
|
|
131
|
+
task.schedule.lastRun = startTime;
|
|
132
|
+
task.schedule.lastRunStatus = status;
|
|
133
|
+
task.schedule.lastRunDurationMs = durationMs;
|
|
134
|
+
|
|
135
|
+
// Recompute next run
|
|
136
|
+
try {
|
|
137
|
+
const cron = resolveCron(task.schedule);
|
|
138
|
+
if (cron) {
|
|
139
|
+
const nextRun = getNextRun(cron);
|
|
140
|
+
task.schedule.nextRun = nextRun.getTime();
|
|
141
|
+
scheduledTasks.set(taskId, { cron, nextRun });
|
|
142
|
+
}
|
|
143
|
+
} catch {
|
|
144
|
+
scheduledTasks.delete(taskId);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
await saveTasks(tasks);
|
|
148
|
+
}
|
|
149
|
+
} catch (err) {
|
|
150
|
+
console.error(`[SCHEDULER] Failed to update task "${taskId}" after execution:`, err.message);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Log execution
|
|
154
|
+
try {
|
|
155
|
+
const entry = {
|
|
156
|
+
id: 'sched_' + startTime + '_' + Math.floor(Math.random() * 1000),
|
|
157
|
+
timestamp: startTime,
|
|
158
|
+
method: 'POST',
|
|
159
|
+
path: `/api/tasks/${taskId}/api`,
|
|
160
|
+
status: status === 'success' ? 200 : 500,
|
|
161
|
+
durationMs,
|
|
162
|
+
source: 'scheduler',
|
|
163
|
+
mode: 'unknown',
|
|
164
|
+
taskId,
|
|
165
|
+
taskName: null,
|
|
166
|
+
url: null,
|
|
167
|
+
result
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Try to get task name
|
|
171
|
+
try {
|
|
172
|
+
const task = getTaskById(taskId);
|
|
173
|
+
if (task) {
|
|
174
|
+
entry.taskName = task.name;
|
|
175
|
+
entry.mode = task.mode || 'agent';
|
|
176
|
+
entry.url = task.url || null;
|
|
177
|
+
}
|
|
178
|
+
} catch { }
|
|
179
|
+
|
|
180
|
+
await appendExecution(entry);
|
|
181
|
+
} catch (err) {
|
|
182
|
+
console.error(`[SCHEDULER] Failed to log execution for task "${taskId}":`, err.message);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
scheduleNext();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Execute a task using the same logic as the API endpoint.
|
|
190
|
+
* Creates mock req/res to reuse existing handlers.
|
|
191
|
+
*/
|
|
192
|
+
async function executeScheduledTask(taskId) {
|
|
193
|
+
const tasks = await loadTasks();
|
|
194
|
+
const task = tasks.find(t => t.id === taskId);
|
|
195
|
+
if (!task) throw new Error('Task not found: ' + taskId);
|
|
196
|
+
|
|
197
|
+
// Lazy-require to avoid circular deps
|
|
198
|
+
const { handleAgent } = require('../../agent');
|
|
199
|
+
const { handleScrape } = require('../../scrape');
|
|
200
|
+
|
|
201
|
+
// Build runtime variables
|
|
202
|
+
const runtimeVars = {};
|
|
203
|
+
if (task.variables) {
|
|
204
|
+
for (const [key, v] of Object.entries(task.variables)) {
|
|
205
|
+
runtimeVars[key] = v.value;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Construct mock request/response
|
|
210
|
+
const body = {
|
|
211
|
+
...task,
|
|
212
|
+
taskId: task.id,
|
|
213
|
+
variables: runtimeVars,
|
|
214
|
+
taskVariables: runtimeVars,
|
|
215
|
+
actions: task.actions || [],
|
|
216
|
+
mode: task.mode || 'agent',
|
|
217
|
+
runSource: 'scheduler'
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
const mockReq = {
|
|
221
|
+
method: 'POST',
|
|
222
|
+
body,
|
|
223
|
+
query: {},
|
|
224
|
+
params: { id: taskId },
|
|
225
|
+
protocol: 'http',
|
|
226
|
+
socket: { remoteAddress: '127.0.0.1' },
|
|
227
|
+
path: `/api/tasks/${taskId}/api`,
|
|
228
|
+
on: () => { },
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
return new Promise((resolve, reject) => {
|
|
232
|
+
let statusCode = 200;
|
|
233
|
+
const mockRes = {
|
|
234
|
+
status: (code) => { statusCode = code; return mockRes; },
|
|
235
|
+
json: (data) => {
|
|
236
|
+
if (statusCode >= 400) {
|
|
237
|
+
reject(new Error(data?.error || `HTTP ${statusCode}`));
|
|
238
|
+
} else {
|
|
239
|
+
resolve(data);
|
|
240
|
+
}
|
|
241
|
+
},
|
|
242
|
+
locals: {},
|
|
243
|
+
on: () => { },
|
|
244
|
+
setHeader: () => { },
|
|
245
|
+
write: () => { },
|
|
246
|
+
end: () => resolve(null),
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const runId = 'sched_' + Date.now();
|
|
250
|
+
mockReq.body.runId = runId;
|
|
251
|
+
|
|
252
|
+
const handler = task.mode === 'scrape' ? handleScrape : handleAgent;
|
|
253
|
+
|
|
254
|
+
try {
|
|
255
|
+
sendExecutionUpdate(runId, { status: 'started' });
|
|
256
|
+
} catch { }
|
|
257
|
+
|
|
258
|
+
Promise.resolve(handler(mockReq, mockRes)).catch(reject);
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Start the scheduler. Call this after the server starts.
|
|
264
|
+
*/
|
|
265
|
+
async function startScheduler() {
|
|
266
|
+
if (running) return;
|
|
267
|
+
running = true;
|
|
268
|
+
console.log('[SCHEDULER] Starting scheduler...');
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
await loadSchedules();
|
|
272
|
+
const count = scheduledTasks.size;
|
|
273
|
+
console.log(`[SCHEDULER] Loaded ${count} scheduled task(s).`);
|
|
274
|
+
|
|
275
|
+
// Update nextRun on all scheduled tasks so frontend can display them
|
|
276
|
+
if (count > 0) {
|
|
277
|
+
const tasks = await loadTasks();
|
|
278
|
+
let dirty = false;
|
|
279
|
+
for (const [taskId, info] of scheduledTasks) {
|
|
280
|
+
const task = tasks.find(t => t.id === taskId);
|
|
281
|
+
if (task && task.schedule) {
|
|
282
|
+
const nextRunMs = info.nextRun.getTime();
|
|
283
|
+
if (task.schedule.nextRun !== nextRunMs) {
|
|
284
|
+
task.schedule.nextRun = nextRunMs;
|
|
285
|
+
dirty = true;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
if (dirty) await saveTasks(tasks);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
scheduleNext();
|
|
293
|
+
} catch (err) {
|
|
294
|
+
console.error('[SCHEDULER] Failed to start:', err.message);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Stop the scheduler.
|
|
300
|
+
*/
|
|
301
|
+
function stopScheduler() {
|
|
302
|
+
running = false;
|
|
303
|
+
if (schedulerTimer) {
|
|
304
|
+
clearTimeout(schedulerTimer);
|
|
305
|
+
schedulerTimer = null;
|
|
306
|
+
}
|
|
307
|
+
scheduledTasks.clear();
|
|
308
|
+
console.log('[SCHEDULER] Stopped.');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Refresh the schedule for a specific task (call after task update).
|
|
313
|
+
*/
|
|
314
|
+
async function refreshSchedule(taskId) {
|
|
315
|
+
const tasks = await loadTasks();
|
|
316
|
+
const task = tasks.find(t => t.id === taskId);
|
|
317
|
+
|
|
318
|
+
if (!task || !task.schedule || !task.schedule.enabled) {
|
|
319
|
+
scheduledTasks.delete(taskId);
|
|
320
|
+
scheduleNext();
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const cron = resolveCron(task.schedule);
|
|
325
|
+
if (!cron) {
|
|
326
|
+
scheduledTasks.delete(taskId);
|
|
327
|
+
scheduleNext();
|
|
328
|
+
return;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
try {
|
|
332
|
+
const nextRun = getNextRun(cron);
|
|
333
|
+
scheduledTasks.set(taskId, { cron, nextRun });
|
|
334
|
+
|
|
335
|
+
// Persist nextRun
|
|
336
|
+
task.schedule.nextRun = nextRun.getTime();
|
|
337
|
+
await saveTasks(tasks);
|
|
338
|
+
} catch (err) {
|
|
339
|
+
console.error(`[SCHEDULER] Failed to refresh schedule for "${taskId}":`, err.message);
|
|
340
|
+
scheduledTasks.delete(taskId);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
scheduleNext();
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Remove the schedule for a specific task.
|
|
348
|
+
*/
|
|
349
|
+
function removeSchedule(taskId) {
|
|
350
|
+
scheduledTasks.delete(taskId);
|
|
351
|
+
scheduleNext();
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Get current scheduler status.
|
|
356
|
+
*/
|
|
357
|
+
function getSchedulerStatus() {
|
|
358
|
+
const entries = [];
|
|
359
|
+
for (const [taskId, info] of scheduledTasks) {
|
|
360
|
+
entries.push({
|
|
361
|
+
taskId,
|
|
362
|
+
cron: info.cron,
|
|
363
|
+
nextRun: info.nextRun.toISOString(),
|
|
364
|
+
nextRunMs: info.nextRun.getTime()
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
return {
|
|
368
|
+
running,
|
|
369
|
+
scheduledCount: scheduledTasks.size,
|
|
370
|
+
tasks: entries
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
module.exports = {
|
|
375
|
+
startScheduler,
|
|
376
|
+
stopScheduler,
|
|
377
|
+
refreshSchedule,
|
|
378
|
+
removeSchedule,
|
|
379
|
+
getSchedulerStatus,
|
|
380
|
+
resolveCron,
|
|
381
|
+
};
|
package/url-utils.js
CHANGED
|
@@ -1,116 +1,137 @@
|
|
|
1
|
-
const dns = require('dns').promises;
|
|
2
|
-
const net = require('net');
|
|
3
|
-
const { ALLOW_PRIVATE_NETWORKS } = require('./src/server/constants');
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Checks if an IP address is private.
|
|
7
|
-
* @param {string} ip The IP address to check.
|
|
8
|
-
* @returns {boolean} True if the IP is private.
|
|
9
|
-
*/
|
|
10
|
-
function isPrivateIP(ip) {
|
|
11
|
-
if (net.isIPv4(ip)) {
|
|
12
|
-
const parts = ip.split('.').map(Number);
|
|
13
|
-
return (
|
|
14
|
-
parts[0] === 0 ||
|
|
15
|
-
parts[0] === 10 ||
|
|
16
|
-
(parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) ||
|
|
17
|
-
(parts[0] === 192 && parts[1] === 168) ||
|
|
18
|
-
parts[0] === 127 ||
|
|
19
|
-
(parts[0] === 169 && parts[1] === 254) ||
|
|
20
|
-
(parts[0] === 100 && parts[1] >= 64 && parts[1] <= 127)
|
|
21
|
-
);
|
|
22
|
-
}
|
|
23
|
-
if (net.isIPv6(ip)) {
|
|
24
|
-
const lower = ip.toLowerCase();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
1
|
+
const dns = require('dns').promises;
|
|
2
|
+
const net = require('net');
|
|
3
|
+
const { ALLOW_PRIVATE_NETWORKS } = require('./src/server/constants');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Checks if an IP address is private.
|
|
7
|
+
* @param {string} ip The IP address to check.
|
|
8
|
+
* @returns {boolean} True if the IP is private.
|
|
9
|
+
*/
|
|
10
|
+
function isPrivateIP(ip) {
|
|
11
|
+
if (net.isIPv4(ip)) {
|
|
12
|
+
const parts = ip.split('.').map(Number);
|
|
13
|
+
return (
|
|
14
|
+
parts[0] === 0 ||
|
|
15
|
+
parts[0] === 10 ||
|
|
16
|
+
(parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) ||
|
|
17
|
+
(parts[0] === 192 && parts[1] === 168) ||
|
|
18
|
+
parts[0] === 127 ||
|
|
19
|
+
(parts[0] === 169 && parts[1] === 254) ||
|
|
20
|
+
(parts[0] === 100 && parts[1] >= 64 && parts[1] <= 127)
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
if (net.isIPv6(ip)) {
|
|
24
|
+
const lower = ip.toLowerCase();
|
|
25
|
+
const parts = lower.split(':');
|
|
26
|
+
const last = parts[parts.length - 1];
|
|
27
|
+
|
|
28
|
+
// Handle IPv4-mapped IPv6 addresses (::ffff:1.2.3.4 or ::ffff:7f00:1)
|
|
29
|
+
const ffffIndex = parts.indexOf('ffff');
|
|
30
|
+
if (ffffIndex !== -1) {
|
|
31
|
+
const prefixAllZeros = parts.slice(0, ffffIndex).every(p => p === '' || p === '0');
|
|
32
|
+
if (prefixAllZeros) {
|
|
33
|
+
if (net.isIPv4(last)) {
|
|
34
|
+
return isPrivateIP(last);
|
|
35
|
+
}
|
|
36
|
+
const p1 = parseInt(parts[parts.length - 2], 16);
|
|
37
|
+
const p2 = parseInt(parts[parts.length - 1], 16);
|
|
38
|
+
if (!isNaN(p1) && !isNaN(p2)) {
|
|
39
|
+
return isPrivateIP(`${(p1 >> 8) & 0xff}.${p1 & 0xff}.${(p2 >> 8) & 0xff}.${p2 & 0xff}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Handle IPv4-compatible IPv6 addresses (::1.2.3.4 or ::7f00:1)
|
|
45
|
+
if (ffffIndex === -1) {
|
|
46
|
+
const prefixAllZeros = parts.slice(0, -2).every(p => p === '' || p === '0');
|
|
47
|
+
if (prefixAllZeros) {
|
|
48
|
+
if (net.isIPv4(last)) {
|
|
49
|
+
return isPrivateIP(last);
|
|
50
|
+
}
|
|
51
|
+
const p1 = parseInt(parts[parts.length - 2], 16);
|
|
52
|
+
const p2 = parseInt(parts[parts.length - 1], 16);
|
|
53
|
+
if (!isNaN(p1) && !isNaN(p2)) {
|
|
54
|
+
return isPrivateIP(`${(p1 >> 8) & 0xff}.${p1 & 0xff}.${(p2 >> 8) & 0xff}.${p2 & 0xff}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ::1 loopback, :: unspecified
|
|
60
|
+
if (lower === '::1' || lower === '::' || lower === '0:0:0:0:0:0:0:0' || lower === '0:0:0:0:0:0:0:1') {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// fe80:: link-local, fc00::/fd00:: unique local
|
|
65
|
+
return (
|
|
66
|
+
lower.startsWith('fe80:') ||
|
|
67
|
+
lower.startsWith('fc') ||
|
|
68
|
+
lower.startsWith('fd')
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Validates a URL to prevent SSRF by blocking private IP ranges.
|
|
76
|
+
* @param {string} urlStr The URL to validate.
|
|
77
|
+
* @throws {Error} If the URL is invalid or points to a private network.
|
|
78
|
+
*/
|
|
79
|
+
async function validateUrl(urlStr) {
|
|
80
|
+
if (!urlStr) return;
|
|
81
|
+
if (ALLOW_PRIVATE_NETWORKS) return;
|
|
82
|
+
|
|
83
|
+
let url;
|
|
84
|
+
try {
|
|
85
|
+
url = new URL(urlStr);
|
|
86
|
+
} catch (e) {
|
|
87
|
+
throw new Error('Invalid URL');
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
91
|
+
throw new Error('Only HTTP and HTTPS protocols are allowed');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
let hostname = url.hostname;
|
|
95
|
+
// Strip brackets from IPv6 hostnames
|
|
96
|
+
if (hostname.startsWith('[') && hostname.endsWith(']')) {
|
|
97
|
+
hostname = hostname.substring(1, hostname.length - 1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Direct check for common private hostnames
|
|
101
|
+
const lowerHost = hostname.toLowerCase();
|
|
102
|
+
if (lowerHost === 'localhost' || lowerHost.endsWith('.localhost')) {
|
|
103
|
+
throw new Error('Access to private network is restricted');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Resolve hostname to IP
|
|
107
|
+
try {
|
|
108
|
+
// dns.lookup follows /etc/hosts and is what's typically used for connecting
|
|
109
|
+
const addresses = await dns.lookup(hostname, { all: true });
|
|
110
|
+
for (const addr of addresses) {
|
|
111
|
+
if (isPrivateIP(addr.address)) {
|
|
112
|
+
throw new Error('Access to private network is restricted');
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
} catch (e) {
|
|
116
|
+
if (e.message === 'Access to private network is restricted') {
|
|
117
|
+
throw e;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// If it's already an IP address, check it directly
|
|
121
|
+
if (net.isIP(hostname)) {
|
|
122
|
+
if (isPrivateIP(hostname)) {
|
|
123
|
+
throw new Error('Access to private network is restricted');
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Rethrow if it's the specific restricted error
|
|
128
|
+
if (e.message === 'Access to private network is restricted') {
|
|
129
|
+
throw e;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// If we can't resolve it and it's not an IP, we allow it to proceed
|
|
133
|
+
// to the browser where it will likely fail normally.
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
module.exports = { validateUrl, isPrivateIP };
|