cipher-security 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/bin/cipher.js +10 -0
  2. package/lib/analyze/consistency.js +566 -0
  3. package/lib/analyze/constitution.js +110 -0
  4. package/lib/analyze/sharding.js +251 -0
  5. package/lib/autonomous/agent-tool.js +165 -0
  6. package/lib/autonomous/framework.js +17 -0
  7. package/lib/autonomous/handoff.js +506 -0
  8. package/lib/autonomous/modes/blue.js +26 -0
  9. package/lib/autonomous/modes/red.js +28 -0
  10. package/lib/benchmark/agent.js +88 -26
  11. package/lib/benchmark/baselines.js +3 -0
  12. package/lib/benchmark/claude-code-solver.js +254 -0
  13. package/lib/benchmark/cognitive.js +283 -0
  14. package/lib/benchmark/index.js +12 -2
  15. package/lib/benchmark/knowledge.js +281 -0
  16. package/lib/benchmark/llm.js +156 -15
  17. package/lib/benchmark/models.js +5 -2
  18. package/lib/benchmark/nyu-ctf.js +192 -0
  19. package/lib/benchmark/overthewire.js +347 -0
  20. package/lib/benchmark/picoctf.js +281 -0
  21. package/lib/benchmark/prompts.js +280 -0
  22. package/lib/benchmark/registry.js +219 -0
  23. package/lib/benchmark/remote-solver.js +356 -0
  24. package/lib/benchmark/remote-target.js +263 -0
  25. package/lib/benchmark/reporter.js +35 -0
  26. package/lib/benchmark/runner.js +174 -10
  27. package/lib/benchmark/sandbox.js +35 -0
  28. package/lib/benchmark/scorer.js +22 -4
  29. package/lib/benchmark/solver.js +34 -1
  30. package/lib/benchmark/tools.js +262 -16
  31. package/lib/commands.js +9 -0
  32. package/lib/execution/council.js +434 -0
  33. package/lib/execution/parallel.js +292 -0
  34. package/lib/gates/circuit-breaker.js +135 -0
  35. package/lib/gates/confidence.js +302 -0
  36. package/lib/gates/corrections.js +219 -0
  37. package/lib/gates/self-check.js +245 -0
  38. package/lib/gateway/commands.js +727 -0
  39. package/lib/guardrails/engine.js +364 -0
  40. package/lib/mcp/server.js +349 -3
  41. package/lib/memory/compressor.js +94 -7
  42. package/lib/pipeline/hooks.js +288 -0
  43. package/lib/pipeline/index.js +11 -0
  44. package/lib/review/budget.js +210 -0
  45. package/lib/review/engine.js +526 -0
  46. package/lib/review/layers/acceptance-auditor.js +279 -0
  47. package/lib/review/layers/blind-hunter.js +500 -0
  48. package/lib/review/layers/defense-in-depth.js +209 -0
  49. package/lib/review/layers/edge-case-hunter.js +266 -0
  50. package/lib/review/panel.js +519 -0
  51. package/lib/review/two-stage.js +244 -0
  52. package/lib/session/cost-tracker.js +203 -0
  53. package/lib/session/logger.js +349 -0
  54. package/package.json +1 -1
@@ -0,0 +1,356 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * Remote Solver — Bridges SecurityAgent to RemoteTarget for solving
7
+ * challenges hosted on external servers (PicoCTF, OverTheWire, etc).
8
+ *
9
+ * Unlike AutonomousSolver which creates a Docker sandbox and reads
10
+ * docker-compose.yml, RemoteSolver connects to an existing remote
11
+ * service via SSH, netcat, or HTTP.
12
+ *
13
+ * The agent uses the same tool-use loop but with remote exec dispatched
14
+ * through the RemoteTarget adapter instead of a sandbox container.
15
+ *
16
+ * @module benchmark/remote-solver
17
+ */
18
+
19
+ import { SolverResult } from './models.js';
20
+ import { SolverAdapter } from './solver.js';
21
+ import { createRemoteTarget } from './remote-target.js';
22
+ import { getToolsForWinCondition, FLAG_TOOLS, QUESTION_TOOLS } from './tools.js';
23
+ import { ConfidenceTracker, createCognitiveDispatcher } from './cognitive.js';
24
+ import { generateSystemPrompt } from './prompts.js';
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // Remote sandbox adapter — wraps RemoteTarget in the SandboxContainer interface
28
+ // ---------------------------------------------------------------------------
29
+
30
+ /**
31
+ * Adapter that gives RemoteTarget the same interface as SandboxContainer,
32
+ * so SecurityAgent can dispatch tool calls through it.
33
+ */
34
+ export class RemoteSandboxAdapter {
35
+ /**
36
+ * @param {import('./remote-target.js').RemoteTarget} target
37
+ */
38
+ constructor(target) {
39
+ this._target = target;
40
+ this._containerId = `remote-${target.type}-${target.host}`;
41
+ }
42
+
43
+ get containerId() { return this._containerId; }
44
+
45
+ /**
46
+ * Execute a command on the remote target.
47
+ * Maps to the SandboxContainer.execTool interface.
48
+ *
49
+ * @param {string} command
50
+ * @param {number} [timeout=60]
51
+ * @returns {{ exitCode: number, stdout: string, stderr: string }}
52
+ */
53
+ execTool(command, timeout = 60) {
54
+ // RemoteTarget.exec is async, but SandboxContainer.execTool is sync
55
+ // We need to handle this — use a blocking wrapper or convert the caller.
56
+ // For now, we store a promise and resolve via the async dispatch path.
57
+ // The actual exec happens in the async dispatchRemoteTool function below.
58
+ return { exitCode: 0, stdout: `[remote] Command queued: ${command}`, stderr: '' };
59
+ }
60
+
61
+ /**
62
+ * Async exec for the remote dispatch path.
63
+ */
64
+ async execToolAsync(command, timeout = 60) {
65
+ const result = await this._target.exec(command, timeout);
66
+ return result;
67
+ }
68
+
69
+ /** No-op — remote targets don't need file injection setup. */
70
+ create() {}
71
+
72
+ /** No-op for remote targets. */
73
+ connectNetwork() {}
74
+
75
+ /** Copy file — not supported for remote targets currently. */
76
+ copyFileIn() {}
77
+
78
+ /** Cleanup the remote connection. */
79
+ cleanup() {
80
+ this._target.cleanup();
81
+ }
82
+
83
+ /** Get the target URL for agent reference. */
84
+ getInternalUrl(serviceName, port) {
85
+ return this._target.url;
86
+ }
87
+ }
88
+
89
+ // ---------------------------------------------------------------------------
90
+ // Remote tool dispatch
91
+ // ---------------------------------------------------------------------------
92
+
93
+ /**
94
+ * Dispatch a tool call for remote challenges.
95
+ * Handles the same tools as the sandbox dispatcher but routes through
96
+ * RemoteTarget instead of Docker exec.
97
+ *
98
+ * @param {string} toolName
99
+ * @param {object} input
100
+ * @param {RemoteSandboxAdapter} adapter
101
+ * @returns {Promise<{ output: string, flagSubmitted?: string, answerSubmitted?: string }>}
102
+ */
103
+ export async function dispatchRemoteTool(toolName, input, adapter) {
104
+ // Flag/answer submission tools
105
+ if (toolName === 'submit_flag') {
106
+ return { output: `Flag submitted: ${input.flag}`, flagSubmitted: input.flag };
107
+ }
108
+ if (toolName === 'submit_answer') {
109
+ return { output: `Answer submitted: ${input.answer}`, answerSubmitted: input.answer };
110
+ }
111
+
112
+ // Execution tools — route through remote target
113
+ if (toolName === 'sandbox_exec') {
114
+ const result = await adapter.execToolAsync(input.command, input.timeout || 60);
115
+ const output = result.stdout + (result.stderr ? `\n[stderr] ${result.stderr}` : '');
116
+ return { output: output || `[exit ${result.exitCode}]` };
117
+ }
118
+
119
+ // HTTP request tool — useful for web challenges
120
+ if (toolName === 'http_request') {
121
+ const method = (input.method || 'GET').toUpperCase();
122
+ let curlCmd = `curl -s -X ${method}`;
123
+ if (input.headers) {
124
+ for (const [k, v] of Object.entries(input.headers)) {
125
+ curlCmd += ` -H "${k}: ${v}"`;
126
+ }
127
+ }
128
+ if (input.body) {
129
+ curlCmd += ` -d '${typeof input.body === 'string' ? input.body : JSON.stringify(input.body)}'`;
130
+ }
131
+ curlCmd += ` "${input.url}"`;
132
+ const result = await adapter.execToolAsync(curlCmd, input.timeout || 30);
133
+ return { output: result.stdout || result.stderr || '[no response]' };
134
+ }
135
+
136
+ // Read file — try cat on remote
137
+ if (toolName === 'read_file') {
138
+ const result = await adapter.execToolAsync(`cat "${input.path}"`, 10);
139
+ return { output: result.stdout || result.stderr || '[empty]' };
140
+ }
141
+
142
+ // Write file — try tee on remote
143
+ if (toolName === 'write_file') {
144
+ const escaped = input.content.replace(/'/g, "'\\''");
145
+ const result = await adapter.execToolAsync(`echo '${escaped}' > "${input.path}"`, 10);
146
+ return { output: result.exitCode === 0 ? `Written to ${input.path}` : result.stderr };
147
+ }
148
+
149
+ // List directory
150
+ if (toolName === 'list_directory') {
151
+ const result = await adapter.execToolAsync(`ls -la "${input.path || '.'}"`, 10);
152
+ return { output: result.stdout || result.stderr || '[empty]' };
153
+ }
154
+
155
+ // Cognitive tools — pass through
156
+ if (toolName === 'update_plan' || toolName === 'check_confidence') {
157
+ return { output: `[cognitive] ${toolName}: acknowledged` };
158
+ }
159
+
160
+ // Disassemble/decompile — not available on most remote targets
161
+ if (toolName === 'disassemble' || toolName === 'decompile') {
162
+ return { output: `[remote] ${toolName} not available on remote target` };
163
+ }
164
+
165
+ return { output: `[remote] Unknown tool: ${toolName}` };
166
+ }
167
+
168
+ // ---------------------------------------------------------------------------
169
+ // RemoteSolver
170
+ // ---------------------------------------------------------------------------
171
+
172
+ /**
173
+ * Solver that connects to remote CTF challenge servers.
174
+ *
175
+ * Supports SSH (OverTheWire), netcat (PicoCTF binary challenges),
176
+ * and HTTP (PicoCTF web challenges) targets.
177
+ */
178
+ export class RemoteSolver extends SolverAdapter {
179
+ /**
180
+ * @param {object} opts
181
+ * @param {string} [opts.backend] - LLM backend override
182
+ * @param {number} [opts.maxTurns=30] - Max agent turns
183
+ * @param {object} [opts.targetInfo] - Remote target connection info
184
+ */
185
+ constructor({ backend, maxTurns = 30, targetInfo } = {}) {
186
+ super();
187
+ this._backend = backend;
188
+ this._maxTurns = maxTurns;
189
+ this._targetInfo = targetInfo;
190
+ }
191
+
192
+ get name() { return 'remote'; }
193
+
194
+ /**
195
+ * Solve a challenge via remote target.
196
+ *
197
+ * @param {import('./models.js').BenchmarkConfig} config
198
+ * @param {string} targetUrl - Target URL (may be overridden by targetInfo)
199
+ * @param {string} expectedFlag - Expected flag for validation
200
+ * @returns {Promise<SolverResult>}
201
+ */
202
+ async solve(config, targetUrl, expectedFlag) {
203
+ const start = performance.now();
204
+ const steps = [];
205
+
206
+ try {
207
+ // Determine target info from config or constructor
208
+ const targetInfo = this._targetInfo || await this._inferTargetInfo(config);
209
+ if (!targetInfo) {
210
+ return new SolverResult({
211
+ durationS: (performance.now() - start) / 1000,
212
+ error: 'No remote target info available for this challenge',
213
+ steps: ['[error] Cannot determine remote target connection info'],
214
+ });
215
+ }
216
+
217
+ // Create remote target
218
+ const target = createRemoteTarget(targetInfo);
219
+ const adapter = new RemoteSandboxAdapter(target);
220
+
221
+ // Create LLM client
222
+ const { makeAgentClient } = await import('./llm.js');
223
+ const { client, model } = await makeAgentClient({ backendOverride: this._backend });
224
+
225
+ // Generate system prompt
226
+ const tracker = new ConfidenceTracker();
227
+ const cognitiveDispatch = createCognitiveDispatcher(tracker);
228
+ const systemPrompt = generateSystemPrompt({
229
+ targetUrl: target.url,
230
+ challengeDescription: config.description,
231
+ tags: config.tags,
232
+ winCondition: config.winCondition,
233
+ serviceUrls: [],
234
+ hasFiles: false,
235
+ phase: tracker.phase,
236
+ });
237
+
238
+ // Build tools
239
+ const tools = getToolsForWinCondition(config.winCondition);
240
+ const messages = [{ role: 'user', content: systemPrompt }];
241
+ let totalIn = 0;
242
+ let totalOut = 0;
243
+ let toolCallCount = 0;
244
+
245
+ // Agent loop
246
+ for (let turn = 0; turn < this._maxTurns; turn++) {
247
+ let response;
248
+ try {
249
+ response = await client.messages.create({
250
+ model, max_tokens: 4096, tools, messages,
251
+ });
252
+ } catch (err) {
253
+ return new SolverResult({
254
+ durationS: (performance.now() - start) / 1000,
255
+ error: `LLM error: ${err.message}`,
256
+ tokensIn: totalIn, tokensOut: totalOut, toolCalls: toolCallCount, steps,
257
+ });
258
+ }
259
+
260
+ totalIn += response.usage?.input_tokens || 0;
261
+ totalOut += response.usage?.output_tokens || 0;
262
+
263
+ const assistantContent = response.content || [];
264
+ messages.push({ role: 'assistant', content: assistantContent });
265
+
266
+ const toolUseBlocks = assistantContent.filter(b => b.type === 'tool_use');
267
+
268
+ if (toolUseBlocks.length === 0) {
269
+ const text = assistantContent.find(b => b.type === 'text')?.text || '';
270
+ steps.push(`[text] ${text.slice(0, 200)}`);
271
+ if (response.stop_reason === 'end_turn') break;
272
+ continue;
273
+ }
274
+
275
+ const toolResults = [];
276
+ for (const block of toolUseBlocks) {
277
+ toolCallCount++;
278
+ steps.push(`[tool] ${block.name}: ${JSON.stringify(block.input).slice(0, 150)}`);
279
+
280
+ // Try cognitive dispatch first, then remote dispatch
281
+ const cogResult = cognitiveDispatch(block.name, block.input);
282
+ const result = cogResult || await dispatchRemoteTool(block.name, block.input, adapter);
283
+ steps.push(`[result] ${result.output.slice(0, 200)}`);
284
+
285
+ if (result.flagSubmitted) {
286
+ adapter.cleanup();
287
+ return new SolverResult({
288
+ flagFound: true, flagValue: result.flagSubmitted,
289
+ durationS: (performance.now() - start) / 1000,
290
+ tokensIn: totalIn, tokensOut: totalOut, toolCalls: toolCallCount, steps,
291
+ });
292
+ }
293
+
294
+ if (result.answerSubmitted) {
295
+ adapter.cleanup();
296
+ return new SolverResult({
297
+ answerFound: true, answerValue: result.answerSubmitted,
298
+ durationS: (performance.now() - start) / 1000,
299
+ tokensIn: totalIn, tokensOut: totalOut, toolCalls: toolCallCount, steps,
300
+ });
301
+ }
302
+
303
+ toolResults.push({ type: 'tool_result', tool_use_id: block.id, content: result.output });
304
+ }
305
+
306
+ messages.push({ role: 'user', content: toolResults });
307
+ }
308
+
309
+ adapter.cleanup();
310
+ return new SolverResult({
311
+ durationS: (performance.now() - start) / 1000,
312
+ tokensIn: totalIn, tokensOut: totalOut, toolCalls: toolCallCount, steps,
313
+ });
314
+ } catch (e) {
315
+ return new SolverResult({
316
+ durationS: (performance.now() - start) / 1000,
317
+ error: String(e), steps: [...steps, `[error] ${e}`],
318
+ });
319
+ }
320
+ }
321
+
322
+ /**
323
+ * Infer remote target info from BenchmarkConfig tags/metadata.
324
+ * @param {import('./models.js').BenchmarkConfig} config
325
+ * @returns {{ type: string, host: string, port: number, username?: string, password?: string }|null}
326
+ */
327
+ async _inferTargetInfo(config) {
328
+ // Check if config has metadata with target info
329
+ if (config.metadata?.targetInfo) return config.metadata.targetInfo;
330
+
331
+ // PicoCTF challenges
332
+ if (config.tags.includes('picoctf')) {
333
+ try {
334
+ const { getPicoTargetInfo, PICOCTF_CATALOG } = await import('./picoctf.js');
335
+ const entry = PICOCTF_CATALOG.find(e => e.name === config.name);
336
+ if (entry) return getPicoTargetInfo(entry);
337
+ } catch { /* module not available */ }
338
+ }
339
+
340
+ // OverTheWire challenges
341
+ if (config.tags.includes('overthewire')) {
342
+ try {
343
+ const { getOtwConnectionInfo } = await import('./overthewire.js');
344
+ const wargame = config.tags.find(t => ['bandit', 'natas', 'leviathan', 'krypton', 'narnia'].includes(t));
345
+ if (wargame) {
346
+ const levelMatch = config.name.match(/Level (\d+)/);
347
+ const level = levelMatch ? parseInt(levelMatch[1], 10) : 0;
348
+ const info = getOtwConnectionInfo(wargame, level);
349
+ return { type: 'ssh', ...info };
350
+ }
351
+ } catch { /* module not available */ }
352
+ }
353
+
354
+ return null;
355
+ }
356
+ }
@@ -0,0 +1,263 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * Remote Target — Connection adapters for external CTF challenge servers.
7
+ *
8
+ * Unlike XBOW/NYU which run in Docker containers, many CTF platforms
9
+ * host challenges on remote servers accessible via SSH, netcat, or HTTP.
10
+ * This module provides a unified interface for connecting to them.
11
+ *
12
+ * @module benchmark/remote-target
13
+ */
14
+
15
+ import { spawnSync, spawn } from 'node:child_process';
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Base class
19
+ // ---------------------------------------------------------------------------
20
+
21
+ /**
22
+ * Abstract remote target connection.
23
+ * Subclasses implement connect(), exec(), and cleanup().
24
+ */
25
+ export class RemoteTarget {
26
+ /**
27
+ * @param {object} opts
28
+ * @param {string} opts.type - 'ssh' | 'netcat' | 'http'
29
+ * @param {string} opts.host - Hostname or IP
30
+ * @param {number} opts.port - Port number
31
+ * @param {string} [opts.username] - SSH username
32
+ * @param {string} [opts.password] - SSH password or challenge password
33
+ * @param {object} [opts.metadata] - Additional target-specific info
34
+ */
35
+ constructor({ type, host, port, username, password, metadata = {} }) {
36
+ this.type = type;
37
+ this.host = host;
38
+ this.port = port;
39
+ this.username = username ?? '';
40
+ this.password = password ?? '';
41
+ this.metadata = metadata;
42
+ this._connected = false;
43
+ }
44
+
45
+ get connected() { return this._connected; }
46
+ get url() { return `${this.type}://${this.host}:${this.port}`; }
47
+
48
+ /** Connect to the remote target. */
49
+ async connect() { throw new Error('Subclass must implement connect()'); }
50
+
51
+ /**
52
+ * Execute a command against the remote target.
53
+ * @param {string} command
54
+ * @param {number} [timeout] - Timeout in seconds
55
+ * @returns {Promise<{ exitCode: number, stdout: string, stderr: string }>}
56
+ */
57
+ async exec(command, timeout = 30) { throw new Error('Subclass must implement exec()'); }
58
+
59
+ /** Clean up connection resources. */
60
+ async cleanup() { this._connected = false; }
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // SSH Target
65
+ // ---------------------------------------------------------------------------
66
+
67
+ /**
68
+ * SSH-based remote target (OverTheWire, SSH-accessible challenges).
69
+ *
70
+ * Uses sshpass + ssh for password-based auth, or ssh with key-based auth.
71
+ * Each exec() call spawns a fresh SSH connection (stateless).
72
+ */
73
+ export class SSHTarget extends RemoteTarget {
74
+ constructor({ host, port = 22, username, password, keyPath, metadata = {} }) {
75
+ super({ type: 'ssh', host, port, username, password, metadata });
76
+ this.keyPath = keyPath ?? '';
77
+ }
78
+
79
+ async connect() {
80
+ // Verify connectivity with a simple echo
81
+ const result = await this.exec('echo __CIPHER_CONNECTED__', 10);
82
+ if (result.stdout.includes('__CIPHER_CONNECTED__')) {
83
+ this._connected = true;
84
+ return true;
85
+ }
86
+ throw new Error(`SSH connection failed to ${this.host}:${this.port}: ${result.stderr}`);
87
+ }
88
+
89
+ async exec(command, timeout = 30) {
90
+ const sshArgs = [
91
+ '-o', 'StrictHostKeyChecking=no',
92
+ '-o', 'UserKnownHostsFile=/dev/null',
93
+ '-o', 'LogLevel=ERROR',
94
+ '-o', `ConnectTimeout=${Math.min(timeout, 10)}`,
95
+ '-p', String(this.port),
96
+ ];
97
+
98
+ if (this.keyPath) {
99
+ sshArgs.push('-i', this.keyPath);
100
+ }
101
+
102
+ sshArgs.push(`${this.username}@${this.host}`, command);
103
+
104
+ let args, cmd;
105
+ if (this.password && !this.keyPath) {
106
+ // Use sshpass for password auth
107
+ cmd = 'sshpass';
108
+ args = ['-p', this.password, 'ssh', ...sshArgs];
109
+ } else {
110
+ cmd = 'ssh';
111
+ args = sshArgs;
112
+ }
113
+
114
+ const result = spawnSync(cmd, args, {
115
+ timeout: timeout * 1000,
116
+ stdio: 'pipe',
117
+ maxBuffer: 5 * 1024 * 1024,
118
+ });
119
+
120
+ return {
121
+ exitCode: result.status ?? -1,
122
+ stdout: (result.stdout || '').toString(),
123
+ stderr: (result.stderr || '').toString(),
124
+ };
125
+ }
126
+ }
127
+
128
+ // ---------------------------------------------------------------------------
129
+ // Netcat Target
130
+ // ---------------------------------------------------------------------------
131
+
132
+ /**
133
+ * Netcat-based remote target (interactive TCP challenges).
134
+ *
135
+ * Many CTF challenges expose a service on a TCP port that you interact with
136
+ * by sending/receiving text. This wraps netcat for that purpose.
137
+ */
138
+ export class NetcatTarget extends RemoteTarget {
139
+ constructor({ host, port, metadata = {} }) {
140
+ super({ type: 'netcat', host, port, metadata });
141
+ }
142
+
143
+ async connect() {
144
+ // Verify port is reachable
145
+ const result = spawnSync('nc', ['-z', '-w', '5', this.host, String(this.port)], {
146
+ timeout: 10000, stdio: 'pipe',
147
+ });
148
+ this._connected = result.status === 0;
149
+ if (!this._connected) {
150
+ throw new Error(`Netcat connection failed to ${this.host}:${this.port}`);
151
+ }
152
+ return true;
153
+ }
154
+
155
+ /**
156
+ * Send input to a netcat service and capture output.
157
+ * @param {string} input - Text to send to the service
158
+ * @param {number} [timeout] - Timeout in seconds
159
+ */
160
+ async exec(input, timeout = 15) {
161
+ const result = spawnSync('nc', ['-w', String(timeout), this.host, String(this.port)], {
162
+ input: input + '\n',
163
+ timeout: timeout * 1000,
164
+ stdio: ['pipe', 'pipe', 'pipe'],
165
+ maxBuffer: 1 * 1024 * 1024,
166
+ });
167
+
168
+ return {
169
+ exitCode: result.status ?? -1,
170
+ stdout: (result.stdout || '').toString(),
171
+ stderr: (result.stderr || '').toString(),
172
+ };
173
+ }
174
+ }
175
+
176
+ // ---------------------------------------------------------------------------
177
+ // HTTP Target
178
+ // ---------------------------------------------------------------------------
179
+
180
+ /**
181
+ * HTTP-based remote target (web exploitation challenges).
182
+ *
183
+ * Wraps curl for HTTP interactions. Supports GET, POST, custom headers.
184
+ */
185
+ export class HTTPTarget extends RemoteTarget {
186
+ constructor({ host, port = 80, protocol = 'http', metadata = {} }) {
187
+ super({ type: 'http', host, port, metadata });
188
+ this.protocol = protocol;
189
+ }
190
+
191
+ get url() { return `${this.protocol}://${this.host}:${this.port}`; }
192
+
193
+ async connect() {
194
+ const result = spawnSync('curl', [
195
+ '-s', '-o', '/dev/null', '-w', '%{http_code}',
196
+ '--connect-timeout', '10', '--max-time', '15',
197
+ this.url,
198
+ ], { timeout: 20000, stdio: 'pipe' });
199
+
200
+ const statusCode = parseInt((result.stdout || '').toString().trim(), 10);
201
+ this._connected = statusCode > 0 && statusCode < 600;
202
+ if (!this._connected) {
203
+ throw new Error(`HTTP connection failed to ${this.url}`);
204
+ }
205
+ return true;
206
+ }
207
+
208
+ /**
209
+ * Execute an HTTP request.
210
+ * @param {string} command - curl-style command args or a URL path
211
+ * @param {number} [timeout] - Timeout in seconds
212
+ */
213
+ async exec(command, timeout = 30) {
214
+ // If command starts with '/' treat as a path on this target
215
+ let curlArgs;
216
+ if (command.startsWith('/') || command.startsWith('http')) {
217
+ const targetUrl = command.startsWith('http') ? command : `${this.url}${command}`;
218
+ curlArgs = ['-s', '--max-time', String(timeout), targetUrl];
219
+ } else {
220
+ // Raw curl args
221
+ curlArgs = ['-s', '--max-time', String(timeout), ...command.split(/\s+/)];
222
+ }
223
+
224
+ const result = spawnSync('curl', curlArgs, {
225
+ timeout: timeout * 1000,
226
+ stdio: 'pipe',
227
+ maxBuffer: 5 * 1024 * 1024,
228
+ });
229
+
230
+ return {
231
+ exitCode: result.status ?? -1,
232
+ stdout: (result.stdout || '').toString(),
233
+ stderr: (result.stderr || '').toString(),
234
+ };
235
+ }
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Factory
240
+ // ---------------------------------------------------------------------------
241
+
242
+ /**
243
+ * Create a RemoteTarget from a challenge config.
244
+ *
245
+ * @param {object} opts
246
+ * @param {string} opts.type - 'ssh' | 'netcat' | 'http'
247
+ * @param {string} opts.host
248
+ * @param {number} opts.port
249
+ * @param {string} [opts.username]
250
+ * @param {string} [opts.password]
251
+ * @param {string} [opts.keyPath]
252
+ * @param {string} [opts.protocol]
253
+ * @param {object} [opts.metadata]
254
+ * @returns {RemoteTarget}
255
+ */
256
+ export function createRemoteTarget(opts) {
257
+ switch (opts.type) {
258
+ case 'ssh': return new SSHTarget(opts);
259
+ case 'netcat': return new NetcatTarget(opts);
260
+ case 'http': return new HTTPTarget(opts);
261
+ default: throw new Error(`Unknown target type: ${opts.type}`);
262
+ }
263
+ }
@@ -99,5 +99,40 @@ export function generateMarkdownReport(report, solverName = '') {
99
99
  lines.push('');
100
100
  }
101
101
 
102
+ // Tag-aware reporting
103
+ const byTag = report.resultsByTag();
104
+ if (Object.keys(byTag).length > 0) {
105
+ lines.push('## Results by Vulnerability Category', '');
106
+ lines.push('| Tag | Total | Passed | Rate |');
107
+ lines.push('|-----|-------|--------|------|');
108
+ for (const [tag, tagResults] of Object.entries(byTag).sort(([a], [b]) => a.localeCompare(b))) {
109
+ const p = tagResults.filter(r => r.passed).length;
110
+ const pct = tagResults.length ? (p / tagResults.length) * 100 : 0;
111
+ lines.push(`| ${tag} | ${tagResults.length} | ${p} | ${pct.toFixed(1)}% |`);
112
+ }
113
+ lines.push('');
114
+ }
115
+
116
+ // Category breakdown (for NYU CTF and categorized benchmarks)
117
+ const byCategory = {};
118
+ for (const r of report.results) {
119
+ const cat = r.config.category;
120
+ if (cat) {
121
+ if (!byCategory[cat]) byCategory[cat] = { total: 0, passed: 0 };
122
+ byCategory[cat].total++;
123
+ if (r.passed) byCategory[cat].passed++;
124
+ }
125
+ }
126
+ if (Object.keys(byCategory).length > 0) {
127
+ lines.push('## Results by Category', '');
128
+ lines.push('| Category | Total | Passed | Rate |');
129
+ lines.push('|----------|-------|--------|------|');
130
+ for (const [cat, data] of Object.entries(byCategory).sort(([a],[b]) => a.localeCompare(b))) {
131
+ const pct = data.total > 0 ? (data.passed / data.total * 100).toFixed(1) : '0.0';
132
+ lines.push(`| ${cat} | ${data.total} | ${data.passed} | ${pct}% |`);
133
+ }
134
+ lines.push('');
135
+ }
136
+
102
137
  return lines.join('\n');
103
138
  }