cipher-security 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cipher.js +465 -0
- package/lib/api/billing.js +321 -0
- package/lib/api/compliance.js +693 -0
- package/lib/api/controls.js +1401 -0
- package/lib/api/index.js +49 -0
- package/lib/api/marketplace.js +467 -0
- package/lib/api/openai-proxy.js +383 -0
- package/lib/api/server.js +685 -0
- package/lib/autonomous/feedback-loop.js +554 -0
- package/lib/autonomous/framework.js +512 -0
- package/lib/autonomous/index.js +97 -0
- package/lib/autonomous/leaderboard.js +594 -0
- package/lib/autonomous/modes/architect.js +412 -0
- package/lib/autonomous/modes/blue.js +386 -0
- package/lib/autonomous/modes/incident.js +684 -0
- package/lib/autonomous/modes/privacy.js +369 -0
- package/lib/autonomous/modes/purple.js +294 -0
- package/lib/autonomous/modes/recon.js +250 -0
- package/lib/autonomous/parallel.js +587 -0
- package/lib/autonomous/researcher.js +583 -0
- package/lib/autonomous/runner.js +955 -0
- package/lib/autonomous/scheduler.js +615 -0
- package/lib/autonomous/task-parser.js +127 -0
- package/lib/autonomous/validators/forensic.js +266 -0
- package/lib/autonomous/validators/osint.js +216 -0
- package/lib/autonomous/validators/privacy.js +296 -0
- package/lib/autonomous/validators/purple.js +298 -0
- package/lib/autonomous/validators/sigma.js +248 -0
- package/lib/autonomous/validators/threat-model.js +363 -0
- package/lib/benchmark/agent.js +119 -0
- package/lib/benchmark/baselines.js +43 -0
- package/lib/benchmark/builder.js +143 -0
- package/lib/benchmark/config.js +35 -0
- package/lib/benchmark/coordinator.js +91 -0
- package/lib/benchmark/index.js +20 -0
- package/lib/benchmark/llm.js +58 -0
- package/lib/benchmark/models.js +137 -0
- package/lib/benchmark/reporter.js +103 -0
- package/lib/benchmark/runner.js +103 -0
- package/lib/benchmark/sandbox.js +96 -0
- package/lib/benchmark/scorer.js +32 -0
- package/lib/benchmark/solver.js +166 -0
- package/lib/benchmark/tools.js +62 -0
- package/lib/bot/bot.js +130 -0
- package/lib/commands.js +99 -0
- package/lib/complexity.js +377 -0
- package/lib/config.js +213 -0
- package/lib/gateway/client.js +309 -0
- package/lib/gateway/commands.js +830 -0
- package/lib/gateway/config-validate.js +109 -0
- package/lib/gateway/gateway.js +367 -0
- package/lib/gateway/index.js +62 -0
- package/lib/gateway/mode.js +309 -0
- package/lib/gateway/plugins.js +222 -0
- package/lib/gateway/prompt.js +214 -0
- package/lib/mcp/server.js +262 -0
- package/lib/memory/compressor.js +425 -0
- package/lib/memory/engine.js +763 -0
- package/lib/memory/evolution.js +668 -0
- package/lib/memory/index.js +58 -0
- package/lib/memory/orchestrator.js +506 -0
- package/lib/memory/retriever.js +515 -0
- package/lib/memory/synthesizer.js +333 -0
- package/lib/pipeline/async-scanner.js +510 -0
- package/lib/pipeline/binary-analysis.js +1043 -0
- package/lib/pipeline/dom-xss-scanner.js +435 -0
- package/lib/pipeline/github-actions.js +792 -0
- package/lib/pipeline/index.js +124 -0
- package/lib/pipeline/osint.js +498 -0
- package/lib/pipeline/sarif.js +373 -0
- package/lib/pipeline/scanner.js +880 -0
- package/lib/pipeline/template-manager.js +525 -0
- package/lib/pipeline/xss-scanner.js +353 -0
- package/lib/setup-wizard.js +229 -0
- package/package.json +30 -0
|
@@ -0,0 +1,955 @@
|
|
|
1
|
+
// Copyright (c) 2026 defconxt. All rights reserved.
|
|
2
|
+
// Licensed under AGPL-3.0 — see LICENSE file for details.
|
|
3
|
+
// CIPHER is a trademark of defconxt.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* runner.js — Mode registry, FlagValidator, and RED mode agent.
|
|
7
|
+
*
|
|
8
|
+
* Ported from autonomous/runner.py (lines 1-630). Contains:
|
|
9
|
+
* - FlagValidator: RED mode output validation (FLAG{hex} pattern)
|
|
10
|
+
* - RED mode tool handlers, schemas, system prompts, factory
|
|
11
|
+
* - MODE_REGISTRY: Map of mode name → factory function
|
|
12
|
+
* - registerMode / availableModes / initModes
|
|
13
|
+
*
|
|
14
|
+
* The dispatcher (runAutonomous) is in T04.
|
|
15
|
+
*
|
|
16
|
+
* @module autonomous/runner
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { ModeAgentConfig, ToolRegistry, ValidationResult } from './framework.js';
|
|
20
|
+
|
|
21
|
+
const debug = process.env.CIPHER_DEBUG === '1'
|
|
22
|
+
? (/** @type {string} */ msg) => process.stderr.write(`[bridge:node] ${msg}\n`)
|
|
23
|
+
: () => {};
|
|
24
|
+
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// FlagValidator — RED mode output validation
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
/** @type {RegExp} */
|
|
30
|
+
export const FLAG_PATTERN = /FLAG\{[a-fA-F0-9]+\}/;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Validates that a FLAG{hex} pattern was captured.
|
|
34
|
+
*
|
|
35
|
+
* Checks both result.outputText and result.steps since the flag may
|
|
36
|
+
* appear in tool output (recorded in steps via [complete]) rather
|
|
37
|
+
* than in the LLM's text response.
|
|
38
|
+
*/
|
|
39
|
+
export class FlagValidator {
|
|
40
|
+
/**
|
|
41
|
+
* @param {import('./framework.js').ModeAgentResult} result
|
|
42
|
+
* @returns {ValidationResult}
|
|
43
|
+
*/
|
|
44
|
+
validate(result) {
|
|
45
|
+
// Check outputText first
|
|
46
|
+
const textMatch = FLAG_PATTERN.exec(result.outputText);
|
|
47
|
+
if (textMatch) {
|
|
48
|
+
return new ValidationResult({
|
|
49
|
+
valid: true,
|
|
50
|
+
errors: [],
|
|
51
|
+
warnings: [],
|
|
52
|
+
score: 1.0,
|
|
53
|
+
metadata: { flag: textMatch[0] },
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Check steps (flag may have triggered completion from tool output)
|
|
58
|
+
for (const step of result.steps) {
|
|
59
|
+
const stepMatch = FLAG_PATTERN.exec(step);
|
|
60
|
+
if (stepMatch) {
|
|
61
|
+
return new ValidationResult({
|
|
62
|
+
valid: true,
|
|
63
|
+
errors: [],
|
|
64
|
+
warnings: [],
|
|
65
|
+
score: 1.0,
|
|
66
|
+
metadata: { flag: stepMatch[0] },
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return new ValidationResult({
|
|
72
|
+
valid: false,
|
|
73
|
+
errors: ['No FLAG{hex} pattern found in output or steps'],
|
|
74
|
+
warnings: [],
|
|
75
|
+
score: 0.0,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// RED mode completion check
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Return true if text contains a FLAG{hex} pattern.
|
|
86
|
+
* @param {string} text
|
|
87
|
+
* @returns {boolean}
|
|
88
|
+
*/
|
|
89
|
+
function _redCompletionCheck(text) {
|
|
90
|
+
return FLAG_PATTERN.test(text);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
// RED mode tool handlers
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Execute an arbitrary shell command in the sandbox.
|
|
99
|
+
* @param {*} context
|
|
100
|
+
* @param {Object} toolInput
|
|
101
|
+
* @returns {string}
|
|
102
|
+
*/
|
|
103
|
+
function _redSandboxExec(context, toolInput) {
|
|
104
|
+
const command = toolInput.command;
|
|
105
|
+
const [exitCode, stdout, stderr] = context.execTool(command);
|
|
106
|
+
|
|
107
|
+
const parts = [];
|
|
108
|
+
if (stdout.trim()) parts.push(`STDOUT:\n${stdout}`);
|
|
109
|
+
if (stderr.trim()) parts.push(`STDERR:\n${stderr}`);
|
|
110
|
+
parts.push(`EXIT CODE: ${exitCode}`);
|
|
111
|
+
|
|
112
|
+
return parts.join('\n');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Make an HTTP request via curl inside the sandbox.
|
|
117
|
+
* @param {*} context
|
|
118
|
+
* @param {Object} toolInput
|
|
119
|
+
* @returns {string}
|
|
120
|
+
*/
|
|
121
|
+
function _redHttpRequest(context, toolInput) {
|
|
122
|
+
const url = toolInput.url;
|
|
123
|
+
const method = (toolInput.method || 'GET').toUpperCase();
|
|
124
|
+
const headers = toolInput.headers || {};
|
|
125
|
+
const body = toolInput.body;
|
|
126
|
+
|
|
127
|
+
const cmdParts = ['curl', '-s', '-S', '-i', '-X', method];
|
|
128
|
+
|
|
129
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
130
|
+
const escapedVal = value.replace(/'/g, "'\\''");
|
|
131
|
+
cmdParts.push('-H', `'${key}: ${escapedVal}'`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (body) {
|
|
135
|
+
const escapedBody = body.replace(/'/g, "'\\''");
|
|
136
|
+
cmdParts.push('-d', `'${escapedBody}'`);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const escapedUrl = url.replace(/"/g, '\\"');
|
|
140
|
+
cmdParts.push(`"${escapedUrl}"`);
|
|
141
|
+
const command = cmdParts.join(' ');
|
|
142
|
+
|
|
143
|
+
const [exitCode, stdout, stderr] = context.execTool(command);
|
|
144
|
+
|
|
145
|
+
const parts = [];
|
|
146
|
+
if (stdout.trim()) parts.push(stdout);
|
|
147
|
+
if (stderr.trim()) parts.push(`CURL ERROR:\n${stderr}`);
|
|
148
|
+
parts.push(`EXIT CODE: ${exitCode}`);
|
|
149
|
+
|
|
150
|
+
return parts.join('\n');
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Read a file inside the sandbox.
|
|
155
|
+
* @param {*} context
|
|
156
|
+
* @param {Object} toolInput
|
|
157
|
+
* @returns {string}
|
|
158
|
+
*/
|
|
159
|
+
function _redReadFile(context, toolInput) {
|
|
160
|
+
const path = toolInput.path;
|
|
161
|
+
const [exitCode, stdout, stderr] = context.execTool(`cat '${path}'`);
|
|
162
|
+
|
|
163
|
+
if (exitCode !== 0) {
|
|
164
|
+
return `ERROR reading ${path}: ${stderr.trim()}`;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return stdout;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// Network exploitation tool handlers
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Scan a target for open ports using nmap in the sandbox.
|
|
176
|
+
* @param {*} context
|
|
177
|
+
* @param {Object} toolInput
|
|
178
|
+
* @returns {string}
|
|
179
|
+
*/
|
|
180
|
+
function _netPortScan(context, toolInput) {
|
|
181
|
+
const target = toolInput.target;
|
|
182
|
+
const ports = toolInput.ports || '1-1000';
|
|
183
|
+
|
|
184
|
+
const command = `nmap -sV -T4 -p ${ports} ${target}`;
|
|
185
|
+
debug(`port_scan: ${command}`);
|
|
186
|
+
const [exitCode, stdout, stderr] = context.execTool(command);
|
|
187
|
+
|
|
188
|
+
const parts = [];
|
|
189
|
+
if (stdout.trim()) parts.push(stdout);
|
|
190
|
+
if (stderr.trim()) parts.push(`STDERR:\n${stderr}`);
|
|
191
|
+
parts.push(`EXIT CODE: ${exitCode}`);
|
|
192
|
+
|
|
193
|
+
return parts.join('\n');
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Open a raw TCP connection and return the banner.
|
|
198
|
+
* @param {*} context
|
|
199
|
+
* @param {Object} toolInput
|
|
200
|
+
* @returns {string}
|
|
201
|
+
*/
|
|
202
|
+
function _netConnectTcp(context, toolInput) {
|
|
203
|
+
const host = toolInput.host;
|
|
204
|
+
const port = toolInput.port;
|
|
205
|
+
const timeout = toolInput.timeout || 5;
|
|
206
|
+
|
|
207
|
+
const command = `echo | nc -w${timeout} ${host} ${port}`;
|
|
208
|
+
debug(`connect_tcp: ${command}`);
|
|
209
|
+
const [exitCode, stdout, stderr] = context.execTool(command);
|
|
210
|
+
|
|
211
|
+
const parts = [];
|
|
212
|
+
if (stdout.trim()) parts.push(stdout);
|
|
213
|
+
if (stderr.trim()) parts.push(`STDERR:\n${stderr}`);
|
|
214
|
+
parts.push(`EXIT CODE: ${exitCode}`);
|
|
215
|
+
|
|
216
|
+
return parts.join('\n');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Send a payload over TCP and return the response.
|
|
221
|
+
* @param {*} context
|
|
222
|
+
* @param {Object} toolInput
|
|
223
|
+
* @returns {string}
|
|
224
|
+
*/
|
|
225
|
+
function _netSendPayload(context, toolInput) {
|
|
226
|
+
const host = toolInput.host;
|
|
227
|
+
const port = toolInput.port;
|
|
228
|
+
const data = toolInput.data;
|
|
229
|
+
const timeout = toolInput.timeout || 5;
|
|
230
|
+
|
|
231
|
+
const escapedData = data.replace(/'/g, "'\\''");
|
|
232
|
+
const command =
|
|
233
|
+
`python3 -c "` +
|
|
234
|
+
`import socket; ` +
|
|
235
|
+
`s=socket.socket(); ` +
|
|
236
|
+
`s.settimeout(${timeout}); ` +
|
|
237
|
+
`s.connect(('${host}',${port})); ` +
|
|
238
|
+
`s.sendall(b'${escapedData}'); ` +
|
|
239
|
+
`print(s.recv(4096).decode('utf-8','replace')); ` +
|
|
240
|
+
`s.close()"`;
|
|
241
|
+
debug(`send_payload: ${command}`);
|
|
242
|
+
const [exitCode, stdout, stderr] = context.execTool(command);
|
|
243
|
+
|
|
244
|
+
const parts = [];
|
|
245
|
+
if (stdout.trim()) parts.push(stdout);
|
|
246
|
+
if (stderr.trim()) parts.push(`STDERR:\n${stderr}`);
|
|
247
|
+
parts.push(`EXIT CODE: ${exitCode}`);
|
|
248
|
+
|
|
249
|
+
return parts.join('\n');
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
// RED mode tool schemas (Anthropic format)
|
|
254
|
+
// ---------------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
const _RED_SANDBOX_EXEC_SCHEMA = {
|
|
257
|
+
name: 'sandbox_exec',
|
|
258
|
+
description:
|
|
259
|
+
'Execute a shell command in the security sandbox. ' +
|
|
260
|
+
'Use this to run any tool: curl, nmap, sqlmap, python3, grep, etc. ' +
|
|
261
|
+
'Returns the command\'s stdout, stderr, and exit code.',
|
|
262
|
+
input_schema: {
|
|
263
|
+
type: 'object',
|
|
264
|
+
properties: {
|
|
265
|
+
command: {
|
|
266
|
+
type: 'string',
|
|
267
|
+
description: 'Shell command to execute (passed to /bin/sh -c)',
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
required: ['command'],
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
const _RED_HTTP_REQUEST_SCHEMA = {
|
|
275
|
+
name: 'http_request',
|
|
276
|
+
description:
|
|
277
|
+
'Make an HTTP request to a URL. Convenience wrapper around curl. ' +
|
|
278
|
+
'Use for GET/POST/PUT requests with optional headers and body.',
|
|
279
|
+
input_schema: {
|
|
280
|
+
type: 'object',
|
|
281
|
+
properties: {
|
|
282
|
+
url: {
|
|
283
|
+
type: 'string',
|
|
284
|
+
description: 'Target URL (e.g., http://web:80/login)',
|
|
285
|
+
},
|
|
286
|
+
method: {
|
|
287
|
+
type: 'string',
|
|
288
|
+
description: 'HTTP method (GET, POST, PUT, DELETE)',
|
|
289
|
+
default: 'GET',
|
|
290
|
+
},
|
|
291
|
+
headers: {
|
|
292
|
+
type: 'object',
|
|
293
|
+
description: 'HTTP headers as key-value pairs',
|
|
294
|
+
additionalProperties: { type: 'string' },
|
|
295
|
+
},
|
|
296
|
+
body: {
|
|
297
|
+
type: 'string',
|
|
298
|
+
description: 'Request body (for POST/PUT)',
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
required: ['url'],
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
const _RED_READ_FILE_SCHEMA = {
|
|
306
|
+
name: 'read_file',
|
|
307
|
+
description:
|
|
308
|
+
'Read the contents of a file inside the sandbox. ' +
|
|
309
|
+
'Use to inspect downloaded files, configuration, source code, etc.',
|
|
310
|
+
input_schema: {
|
|
311
|
+
type: 'object',
|
|
312
|
+
properties: {
|
|
313
|
+
path: {
|
|
314
|
+
type: 'string',
|
|
315
|
+
description: 'Absolute or relative file path inside the sandbox',
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
required: ['path'],
|
|
319
|
+
},
|
|
320
|
+
};
|
|
321
|
+
|
|
322
|
+
// ---------------------------------------------------------------------------
|
|
323
|
+
// Network exploitation tool schemas
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
|
|
326
|
+
const _NET_PORT_SCAN_SCHEMA = {
|
|
327
|
+
name: 'port_scan',
|
|
328
|
+
description:
|
|
329
|
+
'Scan a target for open ports using nmap. ' +
|
|
330
|
+
'Returns service version info for discovered ports.',
|
|
331
|
+
input_schema: {
|
|
332
|
+
type: 'object',
|
|
333
|
+
properties: {
|
|
334
|
+
target: {
|
|
335
|
+
type: 'string',
|
|
336
|
+
description: 'Hostname or IP address to scan',
|
|
337
|
+
},
|
|
338
|
+
ports: {
|
|
339
|
+
type: 'string',
|
|
340
|
+
description:
|
|
341
|
+
"Port range in nmap syntax (e.g. '1-1000', '21,22,80'). Defaults to '1-1000'.",
|
|
342
|
+
},
|
|
343
|
+
},
|
|
344
|
+
required: ['target'],
|
|
345
|
+
},
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
const _NET_CONNECT_TCP_SCHEMA = {
|
|
349
|
+
name: 'connect_tcp',
|
|
350
|
+
description:
|
|
351
|
+
'Open a raw TCP connection and return the banner/greeting. ' +
|
|
352
|
+
'Useful for identifying services (FTP, SSH, SMTP, etc.).',
|
|
353
|
+
input_schema: {
|
|
354
|
+
type: 'object',
|
|
355
|
+
properties: {
|
|
356
|
+
host: {
|
|
357
|
+
type: 'string',
|
|
358
|
+
description: 'Hostname or IP address',
|
|
359
|
+
},
|
|
360
|
+
port: {
|
|
361
|
+
type: 'integer',
|
|
362
|
+
description: 'TCP port number',
|
|
363
|
+
},
|
|
364
|
+
timeout: {
|
|
365
|
+
type: 'integer',
|
|
366
|
+
description: 'Connection timeout in seconds (default: 5)',
|
|
367
|
+
},
|
|
368
|
+
},
|
|
369
|
+
required: ['host', 'port'],
|
|
370
|
+
},
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
const _NET_SEND_PAYLOAD_SCHEMA = {
|
|
374
|
+
name: 'send_payload',
|
|
375
|
+
description:
|
|
376
|
+
'Send a payload over TCP and return the response. ' +
|
|
377
|
+
'Use for interacting with network services (FTP commands, ' +
|
|
378
|
+
'SMTP commands, custom protocols, etc.).',
|
|
379
|
+
input_schema: {
|
|
380
|
+
type: 'object',
|
|
381
|
+
properties: {
|
|
382
|
+
host: {
|
|
383
|
+
type: 'string',
|
|
384
|
+
description: 'Hostname or IP address',
|
|
385
|
+
},
|
|
386
|
+
port: {
|
|
387
|
+
type: 'integer',
|
|
388
|
+
description: 'TCP port number',
|
|
389
|
+
},
|
|
390
|
+
data: {
|
|
391
|
+
type: 'string',
|
|
392
|
+
description: 'Payload string to send',
|
|
393
|
+
},
|
|
394
|
+
timeout: {
|
|
395
|
+
type: 'integer',
|
|
396
|
+
description: 'Socket timeout in seconds (default: 5)',
|
|
397
|
+
},
|
|
398
|
+
},
|
|
399
|
+
required: ['host', 'port', 'data'],
|
|
400
|
+
},
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
// ---------------------------------------------------------------------------
|
|
404
|
+
// RED mode system prompts
|
|
405
|
+
// ---------------------------------------------------------------------------
|
|
406
|
+
|
|
407
|
+
const _RED_SYSTEM_PROMPT = `\
|
|
408
|
+
You are an expert security researcher performing an authorized penetration test \
|
|
409
|
+
against a web application. Your goal is to find and extract the flag.
|
|
410
|
+
|
|
411
|
+
The flag format is FLAG{hex_string} (e.g., FLAG{a1b2c3d4e5f6...}).
|
|
412
|
+
|
|
413
|
+
## Target
|
|
414
|
+
URL: {target_url}
|
|
415
|
+
Description: {challenge_description}
|
|
416
|
+
|
|
417
|
+
## Strategy
|
|
418
|
+
1. First, explore the target with HTTP requests to understand the application
|
|
419
|
+
2. Read HTML source, headers, error messages for clues about the technology stack
|
|
420
|
+
3. Based on clues, identify the vulnerability class (SQLi, SSTI, XSS, SSRF, etc.)
|
|
421
|
+
4. Craft exploitation payloads step by step — test each one and analyze the response
|
|
422
|
+
5. When exploitation succeeds, look for the flag in the response
|
|
423
|
+
6. The flag is often stored in a file like /flag, /flag.txt, or an environment variable
|
|
424
|
+
|
|
425
|
+
## Important Execution Notes
|
|
426
|
+
- Use sandbox_exec with curl commands for full control over HTTP requests
|
|
427
|
+
- Example: sandbox_exec with command: curl -s -H "Host: localhost" "http://web:8000/"
|
|
428
|
+
- If you get "DisallowedHost" or 400 errors, add -H "Host: localhost" to curl
|
|
429
|
+
- CRITICAL: Shell braces {{ and }} are eaten by /bin/sh. Always URL-encode template \
|
|
430
|
+
injection payloads in the URL. For example, use %7B%7B7*7%7D%7D instead of {{7*7}}.
|
|
431
|
+
- For SSTI: try URL-encoded payloads like %7B%7B7*7%7D%7D, %7B%7BFLAG%7D%7D, \
|
|
432
|
+
%7B%7Bconfig%7D%7D, %7B%7Bsettings%7D%7D, %7B%7Brequest.META%7D%7D
|
|
433
|
+
- XBOW challenges typically pass the flag as a template context variable named FLAG — \
|
|
434
|
+
always try %7B%7BFLAG%7D%7D (URL-encoded {{FLAG}}) early in your SSTI attempts
|
|
435
|
+
- The flag is often in a template variable like FLAG, or in /flag.txt, or an env var
|
|
436
|
+
- You have curl, nmap, and python3 available in your sandbox
|
|
437
|
+
- Be persistent — if a payload returns 500, try a different approach
|
|
438
|
+
- NEVER give up after just a few attempts. Keep trying different payloads.
|
|
439
|
+
- When you find the flag (FLAG{...}), state it clearly in your response
|
|
440
|
+
`;
|
|
441
|
+
|
|
442
|
+
const _RED_NETWORK_SYSTEM_PROMPT = `\
|
|
443
|
+
You are an expert security researcher performing an authorized penetration test \
|
|
444
|
+
against a network service. Your goal is to find and extract the flag.
|
|
445
|
+
|
|
446
|
+
The flag format is FLAG{hex_string} (e.g., FLAG{a1b2c3d4e5f6...}).
|
|
447
|
+
|
|
448
|
+
## Target
|
|
449
|
+
Host: {target_host}
|
|
450
|
+
Port: {target_port}
|
|
451
|
+
Description: {challenge_description}
|
|
452
|
+
|
|
453
|
+
## Available Tools
|
|
454
|
+
- **port_scan**: Scan the target for open ports and service versions (nmap)
|
|
455
|
+
- **connect_tcp**: Open a TCP connection to grab service banners
|
|
456
|
+
- **send_payload**: Send arbitrary data over TCP and read the response
|
|
457
|
+
- **sandbox_exec**: Run any shell command (python3, curl, nmap, netcat, etc.)
|
|
458
|
+
- **read_file**: Read files inside the sandbox
|
|
459
|
+
|
|
460
|
+
## Strategy
|
|
461
|
+
1. Use port_scan to discover open ports and running services on the target
|
|
462
|
+
2. Use connect_tcp to grab banners and identify service versions
|
|
463
|
+
3. Identify the service type: FTP, SSH, Telnet, SMTP, custom, etc.
|
|
464
|
+
4. Based on the service, try common exploitation patterns:
|
|
465
|
+
- **FTP**: anonymous login, directory traversal, SITE commands, known CVEs
|
|
466
|
+
- **SSH**: default credentials, key-based auth weaknesses
|
|
467
|
+
- **Telnet**: default credentials, command injection
|
|
468
|
+
- **SMTP**: open relay, VRFY enumeration, header injection
|
|
469
|
+
- **Custom**: protocol fuzzing, buffer overflow, format string
|
|
470
|
+
5. Use send_payload for direct protocol interaction (e.g., FTP commands)
|
|
471
|
+
6. Use sandbox_exec for complex exploits (python3 scripts, metasploit patterns)
|
|
472
|
+
7. Look for FLAG{hex} in service responses, files (/flag, /flag.txt), or env vars
|
|
473
|
+
|
|
474
|
+
## Important Execution Notes
|
|
475
|
+
- Start with port_scan to understand what's running
|
|
476
|
+
- Use connect_tcp to see the service banner before crafting exploits
|
|
477
|
+
- For FTP: try "USER anonymous\\r\\nPASS \\r\\n" via send_payload or sandbox_exec
|
|
478
|
+
- For multi-step protocols, use sandbox_exec with a python3 script
|
|
479
|
+
- The flag may be in a file accessible through the service, not just in banners
|
|
480
|
+
- Be persistent — try multiple approaches if the first doesn't work
|
|
481
|
+
- When you find the flag (FLAG{...}), state it clearly in your response
|
|
482
|
+
`;
|
|
483
|
+
|
|
484
|
+
const _RED_BINARY_SYSTEM_PROMPT = `\
|
|
485
|
+
You are an expert security researcher performing an authorized penetration test \
|
|
486
|
+
against a binary service. Your goal is to find and extract the flag.
|
|
487
|
+
|
|
488
|
+
The flag format is FLAG{hex_string} (e.g., FLAG{a1b2c3d4e5f6...}).
|
|
489
|
+
|
|
490
|
+
## Target
|
|
491
|
+
Host: {target_host}
|
|
492
|
+
Port: {target_port}
|
|
493
|
+
Description: {challenge_description}
|
|
494
|
+
|
|
495
|
+
## Available Tools
|
|
496
|
+
- **sandbox_exec**: Run any shell command (python3, gcc, objdump, gdb, etc.)
|
|
497
|
+
- **read_file**: Read files inside the sandbox
|
|
498
|
+
- **send_payload**: Send arbitrary data over TCP and read the response
|
|
499
|
+
|
|
500
|
+
## Setup
|
|
501
|
+
Install useful tools first:
|
|
502
|
+
\`\`\`
|
|
503
|
+
apt-get update && apt-get install -y binutils file netcat-openbsd
|
|
504
|
+
\`\`\`
|
|
505
|
+
|
|
506
|
+
## Binary Exploitation Strategy
|
|
507
|
+
1. **Probe the service**: Connect to the target and observe its behavior. Note any \
|
|
508
|
+
prompts or output format.
|
|
509
|
+
2. **Identify the vulnerability class**: Send format string specifiers (%p, %x, %s) \
|
|
510
|
+
as input. If the service reflects hex addresses or crashes differently, it is a \
|
|
511
|
+
format string vulnerability.
|
|
512
|
+
3. **Leak stack data**: Send a sequence of \`%p.\` markers to dump stack values. \
|
|
513
|
+
Look for hex patterns that correspond to ASCII text (especially the flag).
|
|
514
|
+
4. **Targeted leaks**: Use positional format specifiers like \`%7$p\`, \`%8$p\`, etc. \
|
|
515
|
+
to read specific stack slots.
|
|
516
|
+
5. **Decode the flag**: Stack values are typically 8 bytes in little-endian order. \
|
|
517
|
+
Convert hex to ASCII and reverse the byte order within each 8-byte word.
|
|
518
|
+
|
|
519
|
+
## Format String Reference
|
|
520
|
+
- \`%p\` — print pointer (hex value from stack), walks the stack one argument at a time
|
|
521
|
+
- \`%N$p\` — print the Nth argument from the stack (e.g., \`%7$p\` reads the 7th)
|
|
522
|
+
- \`%s\` — dereference a pointer on the stack and print the string it points to
|
|
523
|
+
- \`%N$s\` — dereference the Nth argument as a string pointer
|
|
524
|
+
|
|
525
|
+
## Worked Example: Hex-to-ASCII Decoding
|
|
526
|
+
If \`%7$p\` returns \`0x4c467b47414c46\` (8 bytes), decode it:
|
|
527
|
+
\`\`\`python
|
|
528
|
+
import struct
|
|
529
|
+
val = 0x4c467b47414c46
|
|
530
|
+
raw = struct.pack('<Q', val) # little-endian 8-byte word
|
|
531
|
+
print(raw) # b'FLAG{L'
|
|
532
|
+
\`\`\`
|
|
533
|
+
Concatenate decoded words from consecutive stack slots until you have the full flag.
|
|
534
|
+
|
|
535
|
+
## Important Execution Notes
|
|
536
|
+
- **Always use python3 scripts** for exploit payloads — avoid shell escaping issues \
|
|
537
|
+
with \`$\` and \`%\` characters in format strings.
|
|
538
|
+
- Example: write a python3 script that connects via socket, sends the format string, \
|
|
539
|
+
and parses the response.
|
|
540
|
+
- The flag is stored somewhere accessible to the binary (a file it reads, or on \
|
|
541
|
+
the stack). Leak stack contents to find it.
|
|
542
|
+
- Be systematic: probe offsets %1$p through %20$p and look for hex values that \
|
|
543
|
+
decode to printable ASCII in the FLAG{...} pattern.
|
|
544
|
+
- When you find the flag (FLAG{...}), state it clearly in your response.
|
|
545
|
+
`;
|
|
546
|
+
|
|
547
|
+
// ---------------------------------------------------------------------------
|
|
548
|
+
// RED mode factory
|
|
549
|
+
// ---------------------------------------------------------------------------
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Build a RED-mode ModeAgentConfig, selecting tools by challenge category.
|
|
553
|
+
*
|
|
554
|
+
* @param {string} [category='web'] - 'web', 'network', or 'binary'
|
|
555
|
+
* @returns {ModeAgentConfig}
|
|
556
|
+
*/
|
|
557
|
+
function _makeRedConfig(category = 'web') {
|
|
558
|
+
const reg = new ToolRegistry();
|
|
559
|
+
|
|
560
|
+
let systemPrompt;
|
|
561
|
+
|
|
562
|
+
if (category === 'network') {
|
|
563
|
+
reg.register('port_scan', _NET_PORT_SCAN_SCHEMA, _netPortScan);
|
|
564
|
+
reg.register('connect_tcp', _NET_CONNECT_TCP_SCHEMA, _netConnectTcp);
|
|
565
|
+
reg.register('send_payload', _NET_SEND_PAYLOAD_SCHEMA, _netSendPayload);
|
|
566
|
+
reg.register('sandbox_exec', _RED_SANDBOX_EXEC_SCHEMA, _redSandboxExec);
|
|
567
|
+
reg.register('read_file', _RED_READ_FILE_SCHEMA, _redReadFile);
|
|
568
|
+
systemPrompt = _RED_NETWORK_SYSTEM_PROMPT;
|
|
569
|
+
debug('_makeRedConfig: category=network, 5 tools registered');
|
|
570
|
+
} else if (category === 'binary') {
|
|
571
|
+
reg.register('sandbox_exec', _RED_SANDBOX_EXEC_SCHEMA, _redSandboxExec);
|
|
572
|
+
reg.register('read_file', _RED_READ_FILE_SCHEMA, _redReadFile);
|
|
573
|
+
reg.register('send_payload', _NET_SEND_PAYLOAD_SCHEMA, _netSendPayload);
|
|
574
|
+
systemPrompt = _RED_BINARY_SYSTEM_PROMPT;
|
|
575
|
+
debug('_makeRedConfig: category=binary, 3 tools registered');
|
|
576
|
+
} else {
|
|
577
|
+
reg.register('sandbox_exec', _RED_SANDBOX_EXEC_SCHEMA, _redSandboxExec);
|
|
578
|
+
reg.register('http_request', _RED_HTTP_REQUEST_SCHEMA, _redHttpRequest);
|
|
579
|
+
reg.register('read_file', _RED_READ_FILE_SCHEMA, _redReadFile);
|
|
580
|
+
systemPrompt = _RED_SYSTEM_PROMPT;
|
|
581
|
+
debug('_makeRedConfig: category=web, 3 tools registered');
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
return new ModeAgentConfig({
|
|
585
|
+
mode: 'RED',
|
|
586
|
+
toolRegistry: reg,
|
|
587
|
+
systemPromptTemplate: systemPrompt,
|
|
588
|
+
validator: new FlagValidator(),
|
|
589
|
+
maxTurns: 30,
|
|
590
|
+
requiresSandbox: true,
|
|
591
|
+
completionCheck: _redCompletionCheck,
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// ---------------------------------------------------------------------------
|
|
596
|
+
// Mode registry
|
|
597
|
+
// ---------------------------------------------------------------------------
|
|
598
|
+
|
|
599
|
+
/** @type {Map<string, Function>} */
|
|
600
|
+
const MODE_REGISTRY = new Map();
|
|
601
|
+
|
|
602
|
+
// Register RED mode immediately
|
|
603
|
+
MODE_REGISTRY.set('RED', _makeRedConfig);
|
|
604
|
+
|
|
605
|
+
/**
|
|
606
|
+
* Register a mode config factory for use with runAutonomous().
|
|
607
|
+
*
|
|
608
|
+
* @param {string} mode - Mode name (will be uppercased)
|
|
609
|
+
* @param {Function} factory - Callable that returns a fresh ModeAgentConfig
|
|
610
|
+
* @throws {Error} If the mode is already registered
|
|
611
|
+
*/
|
|
612
|
+
export function registerMode(mode, factory) {
|
|
613
|
+
const key = mode.toUpperCase();
|
|
614
|
+
if (MODE_REGISTRY.has(key)) {
|
|
615
|
+
throw new Error(
|
|
616
|
+
`Mode already registered: '${key}'. ` +
|
|
617
|
+
`Registered modes: ${[...MODE_REGISTRY.keys()].sort().join(', ')}`
|
|
618
|
+
);
|
|
619
|
+
}
|
|
620
|
+
MODE_REGISTRY.set(key, factory);
|
|
621
|
+
debug(`Registered mode: ${key}`);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Return sorted array of registered mode names.
|
|
626
|
+
* @returns {string[]}
|
|
627
|
+
*/
|
|
628
|
+
export function availableModes() {
|
|
629
|
+
return [...MODE_REGISTRY.keys()].sort();
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Get the MODE_REGISTRY map (for runAutonomous and testing).
|
|
634
|
+
* @returns {Map<string, Function>}
|
|
635
|
+
*/
|
|
636
|
+
export function getModeRegistry() {
|
|
637
|
+
return MODE_REGISTRY;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
// ---------------------------------------------------------------------------
|
|
641
|
+
// Lazy mode initialization
|
|
642
|
+
// ---------------------------------------------------------------------------
|
|
643
|
+
|
|
644
|
+
let _modesInitialized = false;
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Lazily import all mode files and call their register() functions.
|
|
648
|
+
* Runs only once — subsequent calls are no-ops.
|
|
649
|
+
*
|
|
650
|
+
* @returns {Promise<void>}
|
|
651
|
+
*/
|
|
652
|
+
export async function initModes() {
|
|
653
|
+
if (_modesInitialized) return;
|
|
654
|
+
_modesInitialized = true;
|
|
655
|
+
|
|
656
|
+
debug('Initializing mode agents');
|
|
657
|
+
|
|
658
|
+
const modeModules = await Promise.all([
|
|
659
|
+
import('./modes/blue.js'),
|
|
660
|
+
import('./modes/incident.js'),
|
|
661
|
+
import('./modes/purple.js'),
|
|
662
|
+
import('./modes/recon.js'),
|
|
663
|
+
import('./modes/privacy.js'),
|
|
664
|
+
import('./modes/architect.js'),
|
|
665
|
+
]);
|
|
666
|
+
|
|
667
|
+
for (const mod of modeModules) {
|
|
668
|
+
mod.register(registerMode);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
debug(`Mode initialization complete: ${availableModes().join(', ')}`);
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
/**
|
|
675
|
+
* Reset mode initialization state (for testing only).
|
|
676
|
+
* Clears all modes except RED, resets the initialized flag.
|
|
677
|
+
*/
|
|
678
|
+
export function _resetModes() {
|
|
679
|
+
_modesInitialized = false;
|
|
680
|
+
// Keep only RED
|
|
681
|
+
for (const key of [...MODE_REGISTRY.keys()]) {
|
|
682
|
+
if (key !== 'RED') {
|
|
683
|
+
MODE_REGISTRY.delete(key);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// ---------------------------------------------------------------------------
|
|
689
|
+
// makeAgentClient — auto-detect LLM backend
|
|
690
|
+
// ---------------------------------------------------------------------------
|
|
691
|
+
|
|
692
|
+
/** Default Claude model when using env var directly */
|
|
693
|
+
const DEFAULT_CLAUDE_MODEL = 'claude-sonnet-4-20250514';
|
|
694
|
+
|
|
695
|
+
/** Default Ollama model */
|
|
696
|
+
const DEFAULT_OLLAMA_MODEL = 'qwen2.5:32b';
|
|
697
|
+
|
|
698
|
+
/** Preferred Ollama models in priority order */
|
|
699
|
+
const PREFERRED_OLLAMA_MODELS = [
|
|
700
|
+
'qwen2.5:32b',
|
|
701
|
+
'qwen2.5:14b',
|
|
702
|
+
'llama3.1:70b',
|
|
703
|
+
'llama3.1:8b',
|
|
704
|
+
'mistral:7b',
|
|
705
|
+
];
|
|
706
|
+
|
|
707
|
+
/**
|
|
708
|
+
* Try loading CIPHER gateway config and building a client.
|
|
709
|
+
*
|
|
710
|
+
* @param {string|null} backendOverride
|
|
711
|
+
* @returns {Promise<{client: Object, model: string}|null>}
|
|
712
|
+
*/
|
|
713
|
+
async function _tryGatewayConfig(backendOverride) {
|
|
714
|
+
try {
|
|
715
|
+
const { loadConfig, configExists } = await import('../config.js');
|
|
716
|
+
if (!configExists()) return null;
|
|
717
|
+
|
|
718
|
+
const raw = loadConfig();
|
|
719
|
+
if (backendOverride) {
|
|
720
|
+
raw.llm_backend = backendOverride;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
const { validateConfig } = await import('../gateway/config-validate.js');
|
|
724
|
+
const config = validateConfig(raw);
|
|
725
|
+
const { makeClient } = await import('../gateway/client.js');
|
|
726
|
+
return await makeClient(config);
|
|
727
|
+
} catch (e) {
|
|
728
|
+
debug(`_tryGatewayConfig: ${e.message}`);
|
|
729
|
+
return null;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
/**
|
|
734
|
+
* Try creating a Claude client from ANTHROPIC_API_KEY env var.
|
|
735
|
+
*
|
|
736
|
+
* @returns {Promise<{client: Object, model: string}|null>}
|
|
737
|
+
*/
|
|
738
|
+
async function _tryAnthropicEnv() {
|
|
739
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
740
|
+
if (!apiKey) return null;
|
|
741
|
+
|
|
742
|
+
try {
|
|
743
|
+
const { makeClient } = await import('../gateway/client.js');
|
|
744
|
+
const config = {
|
|
745
|
+
backend: 'claude',
|
|
746
|
+
claude_api_key: apiKey,
|
|
747
|
+
claude_model: DEFAULT_CLAUDE_MODEL,
|
|
748
|
+
claude_timeout: 60,
|
|
749
|
+
};
|
|
750
|
+
const result = await makeClient(config);
|
|
751
|
+
debug(`makeAgentClient: using ANTHROPIC_API_KEY env, model=${DEFAULT_CLAUDE_MODEL}`);
|
|
752
|
+
return result;
|
|
753
|
+
} catch (e) {
|
|
754
|
+
debug(`_tryAnthropicEnv: ${e.message}`);
|
|
755
|
+
return null;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Try connecting to local Ollama and picking the best model.
|
|
761
|
+
*
|
|
762
|
+
* @returns {Promise<{client: Object, model: string}|null>}
|
|
763
|
+
*/
|
|
764
|
+
async function _tryOllama() {
|
|
765
|
+
const baseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
|
|
766
|
+
try {
|
|
767
|
+
// Probe Ollama with a short timeout
|
|
768
|
+
const controller = new AbortController();
|
|
769
|
+
const timeoutId = setTimeout(() => controller.abort(), 2000);
|
|
770
|
+
|
|
771
|
+
const resp = await fetch(`${baseUrl}/api/tags`, { signal: controller.signal });
|
|
772
|
+
clearTimeout(timeoutId);
|
|
773
|
+
|
|
774
|
+
if (!resp.ok) return null;
|
|
775
|
+
|
|
776
|
+
const data = await resp.json();
|
|
777
|
+
const models = data.models || [];
|
|
778
|
+
if (models.length === 0) return null;
|
|
779
|
+
|
|
780
|
+
// Pick the best available model
|
|
781
|
+
const availableNames = new Set(models.map(m => m.name || ''));
|
|
782
|
+
let selected = '';
|
|
783
|
+
for (const preferred of PREFERRED_OLLAMA_MODELS) {
|
|
784
|
+
if (availableNames.has(preferred)) {
|
|
785
|
+
selected = preferred;
|
|
786
|
+
break;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
if (!selected) {
|
|
790
|
+
selected = models[0].name || '';
|
|
791
|
+
}
|
|
792
|
+
if (!selected) return null;
|
|
793
|
+
|
|
794
|
+
const { makeClient } = await import('../gateway/client.js');
|
|
795
|
+
const config = {
|
|
796
|
+
backend: 'ollama',
|
|
797
|
+
ollama_base_url: baseUrl,
|
|
798
|
+
ollama_model: selected,
|
|
799
|
+
ollama_timeout: 120,
|
|
800
|
+
};
|
|
801
|
+
const result = await makeClient(config);
|
|
802
|
+
debug(`makeAgentClient: using Ollama at ${baseUrl}, model=${selected}`);
|
|
803
|
+
return result;
|
|
804
|
+
} catch (e) {
|
|
805
|
+
debug(`_tryOllama: ${e.message}`);
|
|
806
|
+
return null;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Create an LLM client for autonomous mode.
|
|
812
|
+
*
|
|
813
|
+
* Auto-detects the best available backend matching Python's
|
|
814
|
+
* benchmark.llm.make_agent_client() precedence:
|
|
815
|
+
* 1. Explicit override → validate and build
|
|
816
|
+
* 2. CIPHER config file
|
|
817
|
+
* 3. ANTHROPIC_API_KEY env var
|
|
818
|
+
* 4. Local Ollama probe
|
|
819
|
+
*
|
|
820
|
+
* @param {string|null} [backendOverride=null]
|
|
821
|
+
* @returns {Promise<{client: Object, model: string}>}
|
|
822
|
+
* @throws {Error} If no backend is available
|
|
823
|
+
*/
|
|
824
|
+
export async function makeAgentClient(backendOverride = null) {
|
|
825
|
+
debug(`makeAgentClient: override=${backendOverride || 'none'}`);
|
|
826
|
+
|
|
827
|
+
// 1. Explicit override
|
|
828
|
+
if (backendOverride) {
|
|
829
|
+
if (backendOverride === 'ollama') {
|
|
830
|
+
const result = await _tryOllama();
|
|
831
|
+
if (result) return result;
|
|
832
|
+
throw new Error(
|
|
833
|
+
`Ollama not available at ${process.env.OLLAMA_BASE_URL || 'http://localhost:11434'}. ` +
|
|
834
|
+
'Start Ollama with: ollama serve'
|
|
835
|
+
);
|
|
836
|
+
}
|
|
837
|
+
if (backendOverride === 'claude') {
|
|
838
|
+
const envResult = await _tryAnthropicEnv();
|
|
839
|
+
if (envResult) return envResult;
|
|
840
|
+
const gwResult = await _tryGatewayConfig('claude');
|
|
841
|
+
if (gwResult) return gwResult;
|
|
842
|
+
throw new Error(
|
|
843
|
+
'Claude API key not found. Set ANTHROPIC_API_KEY or run: cipher setup'
|
|
844
|
+
);
|
|
845
|
+
}
|
|
846
|
+
// For other overrides (litellm, etc.), try gateway config
|
|
847
|
+
const gwResult = await _tryGatewayConfig(backendOverride);
|
|
848
|
+
if (gwResult) return gwResult;
|
|
849
|
+
throw new Error(
|
|
850
|
+
`Backend '${backendOverride}' not configured. Run: cipher setup`
|
|
851
|
+
);
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// 2. Try existing CIPHER config
|
|
855
|
+
const gwResult = await _tryGatewayConfig(null);
|
|
856
|
+
if (gwResult) return gwResult;
|
|
857
|
+
|
|
858
|
+
// 3. Try ANTHROPIC_API_KEY env var
|
|
859
|
+
const envResult = await _tryAnthropicEnv();
|
|
860
|
+
if (envResult) return envResult;
|
|
861
|
+
|
|
862
|
+
// 4. Try local Ollama
|
|
863
|
+
const ollamaResult = await _tryOllama();
|
|
864
|
+
if (ollamaResult) return ollamaResult;
|
|
865
|
+
|
|
866
|
+
// 5. Nothing available
|
|
867
|
+
throw new Error(
|
|
868
|
+
'No LLM backend available for autonomous mode.\n' +
|
|
869
|
+
'Options:\n' +
|
|
870
|
+
' 1. Start Ollama locally: ollama serve && ollama pull qwen2.5:32b\n' +
|
|
871
|
+
' 2. Set ANTHROPIC_API_KEY environment variable\n' +
|
|
872
|
+
' 3. Run cipher setup to configure a backend'
|
|
873
|
+
);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
// ---------------------------------------------------------------------------
|
|
877
|
+
// runAutonomous — main dispatch entry point
|
|
878
|
+
// ---------------------------------------------------------------------------
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Run an autonomous agent for the given mode.
|
|
882
|
+
*
|
|
883
|
+
* Looks up the mode config from MODE_REGISTRY, creates an LLM client,
|
|
884
|
+
* and runs a BaseAgent loop.
|
|
885
|
+
*
|
|
886
|
+
* @param {string} mode - Mode name (case-insensitive)
|
|
887
|
+
* @param {Object} taskInput - Dict of task parameters passed to BaseAgent.run()
|
|
888
|
+
* @param {string|null} [backend=null] - LLM backend override
|
|
889
|
+
* @param {*} [context=null] - Pre-created context (e.g. SandboxContainer)
|
|
890
|
+
* @returns {Promise<import('./framework.js').ModeAgentResult>}
|
|
891
|
+
* @throws {Error} If mode is unknown or required context is missing
|
|
892
|
+
*/
|
|
893
|
+
export async function runAutonomous(mode, taskInput, backend = null, context = null) {
|
|
894
|
+
const modeKey = mode.toUpperCase();
|
|
895
|
+
debug(`runAutonomous: mode=${modeKey}, backend=${backend || 'auto-detect'}`);
|
|
896
|
+
|
|
897
|
+
// Ensure all modes are registered
|
|
898
|
+
await initModes();
|
|
899
|
+
|
|
900
|
+
// Look up mode factory
|
|
901
|
+
const factory = MODE_REGISTRY.get(modeKey);
|
|
902
|
+
if (!factory) {
|
|
903
|
+
throw new Error(
|
|
904
|
+
`Unknown mode: '${modeKey}'. ` +
|
|
905
|
+
`Available: ${availableModes().join(', ')}`
|
|
906
|
+
);
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
// Create fresh config
|
|
910
|
+
const config = factory();
|
|
911
|
+
|
|
912
|
+
// Validate context requirement
|
|
913
|
+
if (config.requiresSandbox && !context) {
|
|
914
|
+
throw new Error(
|
|
915
|
+
`Mode '${modeKey}' requires a sandbox context. ` +
|
|
916
|
+
`Pass a pre-created context via the 'context' parameter.`
|
|
917
|
+
);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Create LLM client
|
|
921
|
+
const { client, model } = await makeAgentClient(backend);
|
|
922
|
+
debug(`runAutonomous: client ready, model=${model}`);
|
|
923
|
+
|
|
924
|
+
// Run the agent
|
|
925
|
+
const { BaseAgent } = await import('./framework.js');
|
|
926
|
+
const agent = new BaseAgent({ client, model, config, context });
|
|
927
|
+
const result = await agent.run(taskInput);
|
|
928
|
+
|
|
929
|
+
debug(
|
|
930
|
+
`runAutonomous: mode=${modeKey} completed — ` +
|
|
931
|
+
`turns=${result.turnsUsed}, tools=${result.toolCalls}, ` +
|
|
932
|
+
`tokens_in=${result.tokensIn}, tokens_out=${result.tokensOut}, ` +
|
|
933
|
+
`valid=${result.validation ? result.validation.valid : 'N/A'}`
|
|
934
|
+
);
|
|
935
|
+
|
|
936
|
+
return result;
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// ---------------------------------------------------------------------------
|
|
940
|
+
// Exports for testing
|
|
941
|
+
// ---------------------------------------------------------------------------
|
|
942
|
+
|
|
943
|
+
export {
|
|
944
|
+
_makeRedConfig,
|
|
945
|
+
_redCompletionCheck,
|
|
946
|
+
_redSandboxExec,
|
|
947
|
+
_redHttpRequest,
|
|
948
|
+
_redReadFile,
|
|
949
|
+
_netPortScan,
|
|
950
|
+
_netConnectTcp,
|
|
951
|
+
_netSendPayload,
|
|
952
|
+
_tryGatewayConfig,
|
|
953
|
+
_tryAnthropicEnv,
|
|
954
|
+
_tryOllama,
|
|
955
|
+
};
|