web-agent-bridge 2.9.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/sdk/package.json +1 -1
- package/server/routes/runtime.js +204 -0
- package/server/runtime/container-worker.js +111 -0
- package/server/runtime/container.js +448 -0
- package/server/runtime/distributed-worker.js +362 -0
- package/server/runtime/index.js +21 -1
- package/server/runtime/queue.js +599 -0
- package/server/runtime/replay.js +431 -29
- package/server/runtime/scheduler.js +194 -55
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* WAB Container Isolation — Real Process-Level Task Isolation
|
|
5
|
+
*
|
|
6
|
+
* Provides true OS-level isolation for task execution:
|
|
7
|
+
* - Process isolation via child_process.fork()
|
|
8
|
+
* - Resource limits (memory, CPU time, timeout)
|
|
9
|
+
* - Filesystem sandboxing (tmp directory per task)
|
|
10
|
+
* - Network restrictions
|
|
11
|
+
* - Audit trail of all operations
|
|
12
|
+
* - Docker container support (when available)
|
|
13
|
+
*
|
|
14
|
+
* Hierarchy:
|
|
15
|
+
* 1. Process isolation (child_process) — always available
|
|
16
|
+
* 2. Docker containers — optional, enterprise-grade
|
|
17
|
+
* 3. JS sandbox — fallback (existing ExecutionSandbox)
|
|
18
|
+
*
|
|
19
|
+
* The worker process runs inside a container, executes the task,
|
|
20
|
+
* and sends results back via IPC.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { fork, execSync } = require('child_process');
|
|
24
|
+
const crypto = require('crypto');
|
|
25
|
+
const path = require('path');
|
|
26
|
+
const fs = require('fs');
|
|
27
|
+
const os = require('os');
|
|
28
|
+
const { bus } = require('./event-bus');
|
|
29
|
+
|
|
30
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
31
|
+
// CONTAINER RUNNER (child_process based)
|
|
32
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
33
|
+
|
|
34
|
+
class ContainerRunner {
|
|
35
|
+
constructor(options = {}) {
|
|
36
|
+
this._containers = new Map(); // containerId → Container
|
|
37
|
+
this._maxContainers = options.maxContainers || 50;
|
|
38
|
+
this._defaultTimeout = options.defaultTimeout || 60000;
|
|
39
|
+
this._defaultMaxMemory = options.defaultMaxMemory || 256 * 1024 * 1024; // 256MB
|
|
40
|
+
this._tmpBase = options.tmpDir || path.join(os.tmpdir(), 'wab-containers');
|
|
41
|
+
this._dockerAvailable = null;
|
|
42
|
+
this._stats = {
|
|
43
|
+
created: 0, completed: 0, failed: 0, timedOut: 0, killed: 0,
|
|
44
|
+
totalDuration: 0, peakConcurrent: 0,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// Ensure tmp directory exists
|
|
48
|
+
try { fs.mkdirSync(this._tmpBase, { recursive: true }); } catch {}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ─── Process Containers ─────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Create and run a task in an isolated child process
|
|
55
|
+
*/
|
|
56
|
+
async runInProcess(taskId, taskCode, options = {}) {
|
|
57
|
+
if (this._containers.size >= this._maxContainers) {
|
|
58
|
+
throw new Error('Maximum concurrent containers reached');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const containerId = `ctr_${crypto.randomBytes(12).toString('hex')}`;
|
|
62
|
+
const tmpDir = path.join(this._tmpBase, containerId);
|
|
63
|
+
|
|
64
|
+
try { fs.mkdirSync(tmpDir, { recursive: true }); } catch {}
|
|
65
|
+
|
|
66
|
+
const container = {
|
|
67
|
+
id: containerId,
|
|
68
|
+
taskId,
|
|
69
|
+
type: 'process',
|
|
70
|
+
state: 'starting',
|
|
71
|
+
process: null,
|
|
72
|
+
tmpDir,
|
|
73
|
+
pid: null,
|
|
74
|
+
limits: {
|
|
75
|
+
timeout: options.timeout || this._defaultTimeout,
|
|
76
|
+
maxMemory: options.maxMemory || this._defaultMaxMemory,
|
|
77
|
+
allowNetwork: options.allowNetwork !== false,
|
|
78
|
+
},
|
|
79
|
+
audit: [],
|
|
80
|
+
usage: { memoryPeak: 0, cpuTime: 0 },
|
|
81
|
+
startedAt: Date.now(),
|
|
82
|
+
completedAt: null,
|
|
83
|
+
result: null,
|
|
84
|
+
error: null,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
this._containers.set(containerId, container);
|
|
88
|
+
this._stats.created++;
|
|
89
|
+
if (this._containers.size > this._stats.peakConcurrent) {
|
|
90
|
+
this._stats.peakConcurrent = this._containers.size;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
container.audit.push({ action: 'created', timestamp: Date.now() });
|
|
94
|
+
|
|
95
|
+
return new Promise((resolve, reject) => {
|
|
96
|
+
// Write the task handler to a temp file
|
|
97
|
+
const workerScript = path.join(__dirname, 'container-worker.js');
|
|
98
|
+
const taskDataFile = path.join(tmpDir, 'task.json');
|
|
99
|
+
fs.writeFileSync(taskDataFile, JSON.stringify({
|
|
100
|
+
taskId,
|
|
101
|
+
containerId,
|
|
102
|
+
code: taskCode,
|
|
103
|
+
params: options.params || {},
|
|
104
|
+
timeout: container.limits.timeout,
|
|
105
|
+
allowNetwork: container.limits.allowNetwork,
|
|
106
|
+
}));
|
|
107
|
+
|
|
108
|
+
// Fork a child process
|
|
109
|
+
const child = fork(workerScript, [taskDataFile], {
|
|
110
|
+
cwd: tmpDir,
|
|
111
|
+
stdio: ['pipe', 'pipe', 'pipe', 'ipc'],
|
|
112
|
+
env: {
|
|
113
|
+
...process.env,
|
|
114
|
+
WAB_CONTAINER_ID: containerId,
|
|
115
|
+
WAB_TASK_ID: taskId,
|
|
116
|
+
WAB_SANDBOX: 'true',
|
|
117
|
+
NODE_OPTIONS: `--max-old-space-size=${Math.floor(container.limits.maxMemory / (1024 * 1024))}`,
|
|
118
|
+
},
|
|
119
|
+
execArgv: [],
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
container.process = child;
|
|
123
|
+
container.pid = child.pid;
|
|
124
|
+
container.state = 'running';
|
|
125
|
+
container.audit.push({ action: 'started', pid: child.pid, timestamp: Date.now() });
|
|
126
|
+
|
|
127
|
+
bus.emit('container.started', { containerId, taskId, pid: child.pid });
|
|
128
|
+
|
|
129
|
+
// Timeout
|
|
130
|
+
const timer = setTimeout(() => {
|
|
131
|
+
container.state = 'timeout';
|
|
132
|
+
container.error = 'Execution timeout';
|
|
133
|
+
this._stats.timedOut++;
|
|
134
|
+
container.audit.push({ action: 'timeout', timestamp: Date.now() });
|
|
135
|
+
child.kill('SIGKILL');
|
|
136
|
+
}, container.limits.timeout);
|
|
137
|
+
|
|
138
|
+
// Memory monitoring (sample every 2s)
|
|
139
|
+
const memMonitor = setInterval(() => {
|
|
140
|
+
try {
|
|
141
|
+
const usage = process.memoryUsage.call(child);
|
|
142
|
+
if (usage && usage.rss > container.usage.memoryPeak) {
|
|
143
|
+
container.usage.memoryPeak = usage.rss;
|
|
144
|
+
}
|
|
145
|
+
} catch { /* process might be dead */ }
|
|
146
|
+
}, 2000);
|
|
147
|
+
|
|
148
|
+
// Collect stdout/stderr
|
|
149
|
+
let stdout = '';
|
|
150
|
+
let stderr = '';
|
|
151
|
+
if (child.stdout) child.stdout.on('data', d => { stdout += d.toString().slice(0, 10000); });
|
|
152
|
+
if (child.stderr) child.stderr.on('data', d => { stderr += d.toString().slice(0, 10000); });
|
|
153
|
+
|
|
154
|
+
// IPC message — result from worker
|
|
155
|
+
child.on('message', (msg) => {
|
|
156
|
+
if (msg.type === 'result') {
|
|
157
|
+
container.result = msg.data;
|
|
158
|
+
container.audit.push({ action: 'result_received', timestamp: Date.now() });
|
|
159
|
+
} else if (msg.type === 'progress') {
|
|
160
|
+
bus.emit('container.progress', { containerId, taskId, progress: msg.progress });
|
|
161
|
+
} else if (msg.type === 'log') {
|
|
162
|
+
container.audit.push({ action: 'log', message: msg.message, timestamp: Date.now() });
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
child.on('exit', (code, signal) => {
|
|
167
|
+
clearTimeout(timer);
|
|
168
|
+
clearInterval(memMonitor);
|
|
169
|
+
container.completedAt = Date.now();
|
|
170
|
+
const duration = container.completedAt - container.startedAt;
|
|
171
|
+
this._stats.totalDuration += duration;
|
|
172
|
+
|
|
173
|
+
if (container.state === 'timeout') {
|
|
174
|
+
container.audit.push({ action: 'exit_timeout', code, signal, timestamp: Date.now() });
|
|
175
|
+
this._cleanup(containerId);
|
|
176
|
+
resolve({
|
|
177
|
+
success: false,
|
|
178
|
+
containerId,
|
|
179
|
+
taskId,
|
|
180
|
+
error: 'Execution timed out',
|
|
181
|
+
duration,
|
|
182
|
+
stdout: stdout.slice(0, 2000),
|
|
183
|
+
stderr: stderr.slice(0, 2000),
|
|
184
|
+
});
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (code === 0 && container.result !== null) {
|
|
189
|
+
container.state = 'completed';
|
|
190
|
+
this._stats.completed++;
|
|
191
|
+
container.audit.push({ action: 'completed', code, duration, timestamp: Date.now() });
|
|
192
|
+
bus.emit('container.completed', { containerId, taskId, duration });
|
|
193
|
+
|
|
194
|
+
this._cleanup(containerId);
|
|
195
|
+
resolve({
|
|
196
|
+
success: true,
|
|
197
|
+
containerId,
|
|
198
|
+
taskId,
|
|
199
|
+
result: container.result,
|
|
200
|
+
duration,
|
|
201
|
+
stdout: stdout.slice(0, 2000),
|
|
202
|
+
});
|
|
203
|
+
} else {
|
|
204
|
+
container.state = 'failed';
|
|
205
|
+
container.error = stderr || `Process exited with code ${code}`;
|
|
206
|
+
this._stats.failed++;
|
|
207
|
+
container.audit.push({ action: 'failed', code, signal, timestamp: Date.now() });
|
|
208
|
+
bus.emit('container.failed', { containerId, taskId, error: container.error });
|
|
209
|
+
|
|
210
|
+
this._cleanup(containerId);
|
|
211
|
+
resolve({
|
|
212
|
+
success: false,
|
|
213
|
+
containerId,
|
|
214
|
+
taskId,
|
|
215
|
+
error: container.error,
|
|
216
|
+
duration,
|
|
217
|
+
stdout: stdout.slice(0, 2000),
|
|
218
|
+
stderr: stderr.slice(0, 2000),
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
child.on('error', (err) => {
|
|
224
|
+
clearTimeout(timer);
|
|
225
|
+
clearInterval(memMonitor);
|
|
226
|
+
container.state = 'failed';
|
|
227
|
+
container.error = err.message;
|
|
228
|
+
container.completedAt = Date.now();
|
|
229
|
+
this._stats.failed++;
|
|
230
|
+
container.audit.push({ action: 'error', message: err.message, timestamp: Date.now() });
|
|
231
|
+
this._cleanup(containerId);
|
|
232
|
+
resolve({
|
|
233
|
+
success: false,
|
|
234
|
+
containerId,
|
|
235
|
+
taskId,
|
|
236
|
+
error: err.message,
|
|
237
|
+
duration: Date.now() - container.startedAt,
|
|
238
|
+
});
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// ─── Docker Containers ──────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Check if Docker is available
|
|
247
|
+
*/
|
|
248
|
+
isDockerAvailable() {
|
|
249
|
+
if (this._dockerAvailable !== null) return this._dockerAvailable;
|
|
250
|
+
try {
|
|
251
|
+
execSync('docker info', { stdio: 'ignore', timeout: 5000 });
|
|
252
|
+
this._dockerAvailable = true;
|
|
253
|
+
} catch {
|
|
254
|
+
this._dockerAvailable = false;
|
|
255
|
+
}
|
|
256
|
+
return this._dockerAvailable;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Run a task inside a Docker container
|
|
261
|
+
*/
|
|
262
|
+
async runInDocker(taskId, image, command, options = {}) {
|
|
263
|
+
if (!this.isDockerAvailable()) {
|
|
264
|
+
throw new Error('Docker is not available. Use runInProcess() instead.');
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const containerId = `ctr_${crypto.randomBytes(12).toString('hex')}`;
|
|
268
|
+
const containerName = `wab-${containerId}`;
|
|
269
|
+
const timeout = options.timeout || this._defaultTimeout;
|
|
270
|
+
const memLimit = options.maxMemory || this._defaultMaxMemory;
|
|
271
|
+
|
|
272
|
+
const container = {
|
|
273
|
+
id: containerId,
|
|
274
|
+
taskId,
|
|
275
|
+
type: 'docker',
|
|
276
|
+
state: 'starting',
|
|
277
|
+
dockerName: containerName,
|
|
278
|
+
image,
|
|
279
|
+
pid: null,
|
|
280
|
+
limits: { timeout, maxMemory: memLimit },
|
|
281
|
+
audit: [],
|
|
282
|
+
usage: {},
|
|
283
|
+
startedAt: Date.now(),
|
|
284
|
+
completedAt: null,
|
|
285
|
+
result: null,
|
|
286
|
+
error: null,
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
this._containers.set(containerId, container);
|
|
290
|
+
this._stats.created++;
|
|
291
|
+
|
|
292
|
+
const args = [
|
|
293
|
+
'run', '--rm',
|
|
294
|
+
'--name', containerName,
|
|
295
|
+
'--memory', `${Math.floor(memLimit / (1024 * 1024))}m`,
|
|
296
|
+
'--cpus', `${options.cpus || 1}`,
|
|
297
|
+
'--network', options.allowNetwork ? 'bridge' : 'none',
|
|
298
|
+
'--read-only',
|
|
299
|
+
'--tmpfs', '/tmp:rw,noexec,nosuid,size=64m',
|
|
300
|
+
];
|
|
301
|
+
|
|
302
|
+
// Add environment variables
|
|
303
|
+
if (options.env) {
|
|
304
|
+
for (const [k, v] of Object.entries(options.env)) {
|
|
305
|
+
args.push('-e', `${k}=${v}`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
args.push('-e', `WAB_CONTAINER_ID=${containerId}`);
|
|
309
|
+
args.push('-e', `WAB_TASK_ID=${taskId}`);
|
|
310
|
+
|
|
311
|
+
args.push(image);
|
|
312
|
+
if (command) args.push(...(Array.isArray(command) ? command : command.split(' ')));
|
|
313
|
+
|
|
314
|
+
return new Promise((resolve) => {
|
|
315
|
+
const { spawn } = require('child_process');
|
|
316
|
+
const proc = spawn('docker', args, { timeout });
|
|
317
|
+
container.state = 'running';
|
|
318
|
+
|
|
319
|
+
let stdout = '';
|
|
320
|
+
let stderr = '';
|
|
321
|
+
proc.stdout.on('data', d => { stdout += d.toString().slice(0, 50000); });
|
|
322
|
+
proc.stderr.on('data', d => { stderr += d.toString().slice(0, 10000); });
|
|
323
|
+
|
|
324
|
+
proc.on('exit', (code) => {
|
|
325
|
+
container.completedAt = Date.now();
|
|
326
|
+
const duration = container.completedAt - container.startedAt;
|
|
327
|
+
|
|
328
|
+
if (code === 0) {
|
|
329
|
+
container.state = 'completed';
|
|
330
|
+
this._stats.completed++;
|
|
331
|
+
// Try to parse stdout as JSON result
|
|
332
|
+
try { container.result = JSON.parse(stdout); } catch { container.result = stdout; }
|
|
333
|
+
resolve({ success: true, containerId, taskId, result: container.result, duration });
|
|
334
|
+
} else {
|
|
335
|
+
container.state = 'failed';
|
|
336
|
+
container.error = stderr || `Docker exit code ${code}`;
|
|
337
|
+
this._stats.failed++;
|
|
338
|
+
resolve({ success: false, containerId, taskId, error: container.error, duration, stderr });
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
this._containers.delete(containerId);
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
proc.on('error', (err) => {
|
|
345
|
+
container.state = 'failed';
|
|
346
|
+
container.error = err.message;
|
|
347
|
+
container.completedAt = Date.now();
|
|
348
|
+
this._stats.failed++;
|
|
349
|
+
this._containers.delete(containerId);
|
|
350
|
+
resolve({ success: false, containerId, taskId, error: err.message });
|
|
351
|
+
});
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ─── Management ─────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Kill a running container
|
|
359
|
+
*/
|
|
360
|
+
kill(containerId) {
|
|
361
|
+
const container = this._containers.get(containerId);
|
|
362
|
+
if (!container) return false;
|
|
363
|
+
|
|
364
|
+
if (container.type === 'process' && container.process) {
|
|
365
|
+
container.process.kill('SIGKILL');
|
|
366
|
+
this._stats.killed++;
|
|
367
|
+
container.audit.push({ action: 'killed', timestamp: Date.now() });
|
|
368
|
+
} else if (container.type === 'docker') {
|
|
369
|
+
try { execSync(`docker kill ${container.dockerName}`, { timeout: 5000 }); } catch {}
|
|
370
|
+
this._stats.killed++;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
container.state = 'killed';
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Get container details
|
|
379
|
+
*/
|
|
380
|
+
getContainer(containerId) {
|
|
381
|
+
const c = this._containers.get(containerId);
|
|
382
|
+
if (!c) return null;
|
|
383
|
+
return {
|
|
384
|
+
id: c.id,
|
|
385
|
+
taskId: c.taskId,
|
|
386
|
+
type: c.type,
|
|
387
|
+
state: c.state,
|
|
388
|
+
pid: c.pid,
|
|
389
|
+
limits: c.limits,
|
|
390
|
+
usage: c.usage,
|
|
391
|
+
audit: c.audit,
|
|
392
|
+
startedAt: c.startedAt,
|
|
393
|
+
completedAt: c.completedAt,
|
|
394
|
+
duration: c.completedAt ? c.completedAt - c.startedAt : Date.now() - c.startedAt,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* List active containers
|
|
400
|
+
*/
|
|
401
|
+
listContainers() {
|
|
402
|
+
const result = [];
|
|
403
|
+
for (const [, c] of this._containers) {
|
|
404
|
+
result.push({
|
|
405
|
+
id: c.id,
|
|
406
|
+
taskId: c.taskId,
|
|
407
|
+
type: c.type,
|
|
408
|
+
state: c.state,
|
|
409
|
+
pid: c.pid,
|
|
410
|
+
startedAt: c.startedAt,
|
|
411
|
+
duration: c.completedAt ? c.completedAt - c.startedAt : Date.now() - c.startedAt,
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
return result;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
getStats() {
|
|
418
|
+
return {
|
|
419
|
+
...this._stats,
|
|
420
|
+
active: this._containers.size,
|
|
421
|
+
maxContainers: this._maxContainers,
|
|
422
|
+
dockerAvailable: this._dockerAvailable,
|
|
423
|
+
avgDuration: this._stats.completed > 0
|
|
424
|
+
? Math.round(this._stats.totalDuration / this._stats.completed) : 0,
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
_cleanup(containerId) {
|
|
429
|
+
const container = this._containers.get(containerId);
|
|
430
|
+
if (!container) return;
|
|
431
|
+
|
|
432
|
+
// Clean up temp directory
|
|
433
|
+
if (container.tmpDir) {
|
|
434
|
+
try { fs.rmSync(container.tmpDir, { recursive: true, force: true }); } catch {}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Remove after a delay (to allow getContainer queries)
|
|
438
|
+
setTimeout(() => this._containers.delete(containerId), 60000);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
443
|
+
// Singleton
|
|
444
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
445
|
+
|
|
446
|
+
const containerRunner = new ContainerRunner();
|
|
447
|
+
|
|
448
|
+
module.exports = { ContainerRunner, containerRunner };
|