cipher-security 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cipher.js +10 -0
- package/lib/analyze/consistency.js +566 -0
- package/lib/analyze/constitution.js +110 -0
- package/lib/analyze/sharding.js +251 -0
- package/lib/autonomous/agent-tool.js +165 -0
- package/lib/autonomous/framework.js +17 -0
- package/lib/autonomous/handoff.js +506 -0
- package/lib/autonomous/modes/blue.js +26 -0
- package/lib/autonomous/modes/red.js +28 -0
- package/lib/benchmark/agent.js +88 -26
- package/lib/benchmark/baselines.js +3 -0
- package/lib/benchmark/claude-code-solver.js +254 -0
- package/lib/benchmark/cognitive.js +283 -0
- package/lib/benchmark/index.js +12 -2
- package/lib/benchmark/knowledge.js +281 -0
- package/lib/benchmark/llm.js +156 -15
- package/lib/benchmark/models.js +5 -2
- package/lib/benchmark/nyu-ctf.js +192 -0
- package/lib/benchmark/overthewire.js +347 -0
- package/lib/benchmark/picoctf.js +281 -0
- package/lib/benchmark/prompts.js +280 -0
- package/lib/benchmark/registry.js +219 -0
- package/lib/benchmark/remote-solver.js +356 -0
- package/lib/benchmark/remote-target.js +263 -0
- package/lib/benchmark/reporter.js +35 -0
- package/lib/benchmark/runner.js +174 -10
- package/lib/benchmark/sandbox.js +35 -0
- package/lib/benchmark/scorer.js +22 -4
- package/lib/benchmark/solver.js +34 -1
- package/lib/benchmark/tools.js +262 -16
- package/lib/commands.js +9 -0
- package/lib/execution/council.js +434 -0
- package/lib/execution/parallel.js +292 -0
- package/lib/gates/circuit-breaker.js +135 -0
- package/lib/gates/confidence.js +302 -0
- package/lib/gates/corrections.js +219 -0
- package/lib/gates/self-check.js +245 -0
- package/lib/gateway/commands.js +727 -0
- package/lib/guardrails/engine.js +364 -0
- package/lib/mcp/server.js +349 -3
- package/lib/memory/compressor.js +94 -7
- package/lib/pipeline/hooks.js +288 -0
- package/lib/pipeline/index.js +11 -0
- package/lib/review/budget.js +210 -0
- package/lib/review/engine.js +526 -0
- package/lib/review/layers/acceptance-auditor.js +279 -0
- package/lib/review/layers/blind-hunter.js +500 -0
- package/lib/review/layers/defense-in-depth.js +209 -0
- package/lib/review/layers/edge-case-hunter.js +266 -0
- package/lib/review/panel.js +519 -0
- package/lib/review/two-stage.js +244 -0
- package/lib/session/cost-tracker.js +203 -0
- package/lib/session/logger.js +349 -0
- package/package.json +1 -1
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
// Copyright (c) 2026 defconxt. All rights reserved.
|
|
2
|
+
// Licensed under AGPL-3.0 — see LICENSE file for details.
|
|
3
|
+
// CIPHER is a trademark of defconxt.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Agent Handoff Engine — Formal transfer protocol for multi-mode chains.
|
|
7
|
+
*
|
|
8
|
+
* Enables mode agents to hand off to each other mid-execution via
|
|
9
|
+
* `transfer_to_<mode>` tool calls, with context filtering and cycle detection.
|
|
10
|
+
*
|
|
11
|
+
* Key exports:
|
|
12
|
+
* - HandoffEngine: manages transfer interception, context filtering, depth tracking
|
|
13
|
+
* - runWithHandoffs: wraps runAutonomous with handoff support
|
|
14
|
+
* - runChain: explicit sequential multi-mode execution
|
|
15
|
+
* - HandoffResult: aggregated results from a chain
|
|
16
|
+
* - HandoffLog: structured event timeline
|
|
17
|
+
*
|
|
18
|
+
* @module autonomous/handoff
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { ModeAgentResult } from './framework.js';
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Constants
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
/** Maximum handoff depth before forced halt */
|
|
28
|
+
const DEFAULT_MAX_DEPTH = 5;
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Types
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* A single handoff event in the timeline.
|
|
36
|
+
*/
|
|
37
|
+
export class HandoffEvent {
|
|
38
|
+
/**
|
|
39
|
+
* @param {object} opts
|
|
40
|
+
* @param {string} opts.sourceMode - Mode that initiated the transfer
|
|
41
|
+
* @param {string} opts.targetMode - Mode being transferred to
|
|
42
|
+
* @param {number} opts.timestamp - Unix epoch seconds
|
|
43
|
+
* @param {number} opts.depth - Current handoff depth
|
|
44
|
+
* @param {string} [opts.reason] - Why the transfer was requested
|
|
45
|
+
* @param {string} [opts.contextSummary] - Summary of filtered context passed
|
|
46
|
+
* @param {string} [opts.status] - 'completed' | 'blocked_cycle' | 'blocked_depth'
|
|
47
|
+
*/
|
|
48
|
+
constructor(opts = {}) {
|
|
49
|
+
this.sourceMode = opts.sourceMode ?? '';
|
|
50
|
+
this.targetMode = opts.targetMode ?? '';
|
|
51
|
+
this.timestamp = opts.timestamp ?? Date.now() / 1000;
|
|
52
|
+
this.depth = opts.depth ?? 0;
|
|
53
|
+
this.reason = opts.reason ?? '';
|
|
54
|
+
this.contextSummary = opts.contextSummary ?? '';
|
|
55
|
+
this.status = opts.status ?? 'completed';
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Aggregated result from a multi-mode handoff chain.
|
|
61
|
+
*/
|
|
62
|
+
export class HandoffResult {
|
|
63
|
+
/**
|
|
64
|
+
* @param {object} opts
|
|
65
|
+
* @param {ModeAgentResult[]} [opts.results] - Per-mode results in execution order
|
|
66
|
+
* @param {HandoffEvent[]} [opts.events] - Handoff event timeline
|
|
67
|
+
* @param {number} [opts.totalDurationS] - Total wall-clock seconds
|
|
68
|
+
* @param {number} [opts.totalTokensIn] - Aggregate input tokens
|
|
69
|
+
* @param {number} [opts.totalTokensOut] - Aggregate output tokens
|
|
70
|
+
* @param {string|null} [opts.error] - Top-level error if chain failed
|
|
71
|
+
*/
|
|
72
|
+
constructor(opts = {}) {
|
|
73
|
+
/** @type {ModeAgentResult[]} */
|
|
74
|
+
this.results = opts.results ?? [];
|
|
75
|
+
/** @type {HandoffEvent[]} */
|
|
76
|
+
this.events = opts.events ?? [];
|
|
77
|
+
this.totalDurationS = opts.totalDurationS ?? 0;
|
|
78
|
+
this.totalTokensIn = opts.totalTokensIn ?? 0;
|
|
79
|
+
this.totalTokensOut = opts.totalTokensOut ?? 0;
|
|
80
|
+
this.error = opts.error ?? null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Return array of modes executed, in order. */
|
|
84
|
+
get modesExecuted() {
|
|
85
|
+
return this.results.map(r => r.mode);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* @typedef {object} HandoffFilter
|
|
91
|
+
* @property {(context: object, sourceMode: string, targetMode: string) => object} transform
|
|
92
|
+
* Transform handoff context before passing to target mode.
|
|
93
|
+
* @property {string[]} [acceptsFrom]
|
|
94
|
+
* Modes this filter accepts transfers from. Empty = accept all.
|
|
95
|
+
*/
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Default pass-through filter — passes context unchanged, accepts from all modes.
|
|
99
|
+
* @type {HandoffFilter}
|
|
100
|
+
*/
|
|
101
|
+
export const DEFAULT_HANDOFF_FILTER = {
|
|
102
|
+
transform: (context) => context,
|
|
103
|
+
acceptsFrom: [],
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
// HandoffEngine
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Manages agent-to-agent transfers with context filtering and cycle detection.
|
|
112
|
+
*/
|
|
113
|
+
export class HandoffEngine {
|
|
114
|
+
/**
|
|
115
|
+
* @param {object} opts
|
|
116
|
+
* @param {number} [opts.maxDepth] - Maximum handoff depth
|
|
117
|
+
* @param {Map<string, HandoffFilter>} [opts.filters] - Per-mode handoff filters
|
|
118
|
+
*/
|
|
119
|
+
constructor(opts = {}) {
|
|
120
|
+
this._maxDepth = opts.maxDepth ?? DEFAULT_MAX_DEPTH;
|
|
121
|
+
/** @type {Map<string, HandoffFilter>} */
|
|
122
|
+
this._filters = opts.filters ?? new Map();
|
|
123
|
+
/** @type {HandoffEvent[]} */
|
|
124
|
+
this._events = [];
|
|
125
|
+
this._currentDepth = 0;
|
|
126
|
+
/** @type {Set<string>} */
|
|
127
|
+
this._visited = new Set();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/** Current handoff depth. */
|
|
131
|
+
get depth() { return this._currentDepth; }
|
|
132
|
+
|
|
133
|
+
/** Max depth. */
|
|
134
|
+
get maxDepth() { return this._maxDepth; }
|
|
135
|
+
|
|
136
|
+
/** Full event timeline. */
|
|
137
|
+
get events() { return [...this._events]; }
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Register a handoff filter for a mode.
|
|
141
|
+
* @param {string} mode
|
|
142
|
+
* @param {HandoffFilter} filter
|
|
143
|
+
*/
|
|
144
|
+
setFilter(mode, filter) {
|
|
145
|
+
this._filters.set(mode.toUpperCase(), filter);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Get the filter for a mode (returns default if none registered).
|
|
150
|
+
* @param {string} mode
|
|
151
|
+
* @returns {HandoffFilter}
|
|
152
|
+
*/
|
|
153
|
+
getFilter(mode) {
|
|
154
|
+
return this._filters.get(mode.toUpperCase()) || DEFAULT_HANDOFF_FILTER;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Check whether a transfer from sourceMode to targetMode is allowed.
|
|
159
|
+
*
|
|
160
|
+
* Returns { allowed: boolean, reason: string }.
|
|
161
|
+
*
|
|
162
|
+
* @param {string} sourceMode
|
|
163
|
+
* @param {string} targetMode
|
|
164
|
+
* @returns {{ allowed: boolean, reason: string }}
|
|
165
|
+
*/
|
|
166
|
+
canTransfer(sourceMode, targetMode) {
|
|
167
|
+
const source = sourceMode.toUpperCase();
|
|
168
|
+
const target = targetMode.toUpperCase();
|
|
169
|
+
|
|
170
|
+
// Depth check
|
|
171
|
+
if (this._currentDepth >= this._maxDepth) {
|
|
172
|
+
return { allowed: false, reason: `Max handoff depth (${this._maxDepth}) reached` };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Cycle check — same mode back-to-back is a cycle
|
|
176
|
+
if (this._visited.has(target)) {
|
|
177
|
+
return { allowed: false, reason: `Cycle detected: ${target} already visited in chain [${[...this._visited].join('→')}]` };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Filter acceptance check
|
|
181
|
+
const filter = this.getFilter(target);
|
|
182
|
+
if (filter.acceptsFrom && filter.acceptsFrom.length > 0) {
|
|
183
|
+
if (!filter.acceptsFrom.includes(source)) {
|
|
184
|
+
return { allowed: false, reason: `${target} does not accept transfers from ${source}` };
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return { allowed: true, reason: '' };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Record a transfer event and update tracking state.
|
|
193
|
+
*
|
|
194
|
+
* @param {string} sourceMode
|
|
195
|
+
* @param {string} targetMode
|
|
196
|
+
* @param {object} context - Raw context before filtering
|
|
197
|
+
* @param {string} [status='completed']
|
|
198
|
+
* @returns {HandoffEvent}
|
|
199
|
+
*/
|
|
200
|
+
recordTransfer(sourceMode, targetMode, context = {}, status = 'completed') {
|
|
201
|
+
const source = sourceMode.toUpperCase();
|
|
202
|
+
const target = targetMode.toUpperCase();
|
|
203
|
+
|
|
204
|
+
const event = new HandoffEvent({
|
|
205
|
+
sourceMode: source,
|
|
206
|
+
targetMode: target,
|
|
207
|
+
timestamp: Date.now() / 1000,
|
|
208
|
+
depth: this._currentDepth,
|
|
209
|
+
reason: context.reason || '',
|
|
210
|
+
contextSummary: this._summarizeContext(context),
|
|
211
|
+
status,
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
this._events.push(event);
|
|
215
|
+
|
|
216
|
+
if (status === 'completed') {
|
|
217
|
+
this._visited.add(source);
|
|
218
|
+
this._visited.add(target);
|
|
219
|
+
this._currentDepth += 1;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return event;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Filter context for the target mode using registered filters.
|
|
227
|
+
*
|
|
228
|
+
* @param {object} context
|
|
229
|
+
* @param {string} sourceMode
|
|
230
|
+
* @param {string} targetMode
|
|
231
|
+
* @returns {object}
|
|
232
|
+
*/
|
|
233
|
+
filterContext(context, sourceMode, targetMode) {
|
|
234
|
+
const filter = this.getFilter(targetMode);
|
|
235
|
+
return filter.transform(context, sourceMode.toUpperCase(), targetMode.toUpperCase());
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Generate transfer_to tool schemas for all available modes except the current one.
|
|
240
|
+
*
|
|
241
|
+
* @param {string[]} availableModes - All registered mode names
|
|
242
|
+
* @param {string} currentMode - The currently active mode
|
|
243
|
+
* @returns {Array<{ name: string, schema: object, targetMode: string }>}
|
|
244
|
+
*/
|
|
245
|
+
generateTransferTools(availableModes, currentMode) {
|
|
246
|
+
const current = currentMode.toUpperCase();
|
|
247
|
+
return availableModes
|
|
248
|
+
.filter(m => m.toUpperCase() !== current)
|
|
249
|
+
.map(mode => {
|
|
250
|
+
const modeKey = mode.toLowerCase();
|
|
251
|
+
return {
|
|
252
|
+
name: `transfer_to_${modeKey}`,
|
|
253
|
+
targetMode: mode.toUpperCase(),
|
|
254
|
+
schema: {
|
|
255
|
+
name: `transfer_to_${modeKey}`,
|
|
256
|
+
description: `Transfer execution to ${mode.toUpperCase()} mode agent. Use when the current task requires ${mode} capabilities.`,
|
|
257
|
+
input_schema: {
|
|
258
|
+
type: 'object',
|
|
259
|
+
properties: {
|
|
260
|
+
reason: {
|
|
261
|
+
type: 'string',
|
|
262
|
+
description: 'Why this transfer is needed',
|
|
263
|
+
},
|
|
264
|
+
context: {
|
|
265
|
+
type: 'string',
|
|
266
|
+
description: 'Key findings or context to pass to the target mode',
|
|
267
|
+
},
|
|
268
|
+
task: {
|
|
269
|
+
type: 'string',
|
|
270
|
+
description: 'Specific task for the target mode to accomplish',
|
|
271
|
+
},
|
|
272
|
+
},
|
|
273
|
+
required: ['reason', 'task'],
|
|
274
|
+
},
|
|
275
|
+
},
|
|
276
|
+
};
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Reset engine state for a new chain.
|
|
282
|
+
*/
|
|
283
|
+
reset() {
|
|
284
|
+
this._events = [];
|
|
285
|
+
this._currentDepth = 0;
|
|
286
|
+
this._visited.clear();
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Increment depth without recording a full transfer event.
|
|
291
|
+
* Used by agent-as-tool to share depth tracking.
|
|
292
|
+
*/
|
|
293
|
+
incrementDepth() {
|
|
294
|
+
this._currentDepth += 1;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Decrement depth (when agent-as-tool sub-agent completes).
|
|
299
|
+
*/
|
|
300
|
+
decrementDepth() {
|
|
301
|
+
if (this._currentDepth > 0) this._currentDepth -= 1;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// -- internal -----------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* @param {object} context
|
|
308
|
+
* @returns {string}
|
|
309
|
+
*/
|
|
310
|
+
_summarizeContext(context) {
|
|
311
|
+
if (!context || typeof context !== 'object') return '';
|
|
312
|
+
const keys = Object.keys(context);
|
|
313
|
+
if (keys.length === 0) return '(empty)';
|
|
314
|
+
const preview = keys.slice(0, 5).join(', ');
|
|
315
|
+
return `keys: [${preview}]${keys.length > 5 ? `, +${keys.length - 5} more` : ''}`;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ---------------------------------------------------------------------------
|
|
320
|
+
// runWithHandoffs — wraps BaseAgent with handoff interception
|
|
321
|
+
// ---------------------------------------------------------------------------
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Run an autonomous agent with handoff support.
|
|
325
|
+
*
|
|
326
|
+
* Wraps the standard runAutonomous flow with a preToolHook that intercepts
|
|
327
|
+
* transfer_to_<mode> tool calls and spawns the target agent.
|
|
328
|
+
*
|
|
329
|
+
* @param {string} mode - Starting mode
|
|
330
|
+
* @param {object} taskInput - Task parameters
|
|
331
|
+
* @param {object} [opts]
|
|
332
|
+
* @param {string|null} [opts.backend] - LLM backend override
|
|
333
|
+
* @param {*} [opts.context] - Sandbox context
|
|
334
|
+
* @param {number} [opts.maxDepth] - Max handoff depth
|
|
335
|
+
* @param {Map<string, HandoffFilter>} [opts.filters] - Per-mode filters
|
|
336
|
+
* @param {Function} [opts.agentRunner] - Injectable runner for testing (default: runAutonomous)
|
|
337
|
+
* @returns {Promise<HandoffResult>}
|
|
338
|
+
*/
|
|
339
|
+
export async function runWithHandoffs(mode, taskInput, opts = {}) {
|
|
340
|
+
const engine = new HandoffEngine({
|
|
341
|
+
maxDepth: opts.maxDepth ?? DEFAULT_MAX_DEPTH,
|
|
342
|
+
filters: opts.filters,
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
const startTime = performance.now() / 1000;
|
|
346
|
+
const handoffResult = new HandoffResult();
|
|
347
|
+
|
|
348
|
+
// Import runner lazily to avoid circular deps
|
|
349
|
+
const runner = opts.agentRunner || (await import('./runner.js')).runAutonomous;
|
|
350
|
+
|
|
351
|
+
// Mark starting mode as visited
|
|
352
|
+
engine._visited.add(mode.toUpperCase());
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Execute a single mode, potentially recursing on handoff.
|
|
356
|
+
*
|
|
357
|
+
* @param {string} currentMode
|
|
358
|
+
* @param {object} currentTask
|
|
359
|
+
* @returns {Promise<void>}
|
|
360
|
+
*/
|
|
361
|
+
async function executeMode(currentMode, currentTask) {
|
|
362
|
+
let result;
|
|
363
|
+
try {
|
|
364
|
+
result = await runner(currentMode, currentTask, opts.backend, opts.context);
|
|
365
|
+
} catch (e) {
|
|
366
|
+
handoffResult.error = `${currentMode} execution failed: ${e.message}`;
|
|
367
|
+
handoffResult.results.push(new ModeAgentResult({
|
|
368
|
+
mode: currentMode,
|
|
369
|
+
error: e.message,
|
|
370
|
+
}));
|
|
371
|
+
return;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
handoffResult.results.push(result);
|
|
375
|
+
handoffResult.totalTokensIn += result.tokensIn;
|
|
376
|
+
handoffResult.totalTokensOut += result.tokensOut;
|
|
377
|
+
|
|
378
|
+
// Check if the agent requested a transfer (look in steps for transfer tool calls)
|
|
379
|
+
const transferStep = result.steps.find(s =>
|
|
380
|
+
s.startsWith('[tool] transfer_to_')
|
|
381
|
+
);
|
|
382
|
+
|
|
383
|
+
if (!transferStep) return; // No handoff requested
|
|
384
|
+
|
|
385
|
+
// Parse transfer target from step log
|
|
386
|
+
const match = transferStep.match(/\[tool\] transfer_to_(\w+)/);
|
|
387
|
+
if (!match) return;
|
|
388
|
+
|
|
389
|
+
const targetMode = match[1].toUpperCase();
|
|
390
|
+
const check = engine.canTransfer(currentMode, targetMode);
|
|
391
|
+
|
|
392
|
+
if (!check.allowed) {
|
|
393
|
+
engine.recordTransfer(currentMode, targetMode, { reason: check.reason }, 'blocked_cycle');
|
|
394
|
+
result.steps.push(`[handoff:blocked] ${check.reason}`);
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Filter context for target
|
|
399
|
+
const handoffContext = {
|
|
400
|
+
reason: `Transfer from ${currentMode}`,
|
|
401
|
+
priorOutput: result.outputText,
|
|
402
|
+
priorFindings: result.outputData,
|
|
403
|
+
};
|
|
404
|
+
const filtered = engine.filterContext(handoffContext, currentMode, targetMode);
|
|
405
|
+
|
|
406
|
+
engine.recordTransfer(currentMode, targetMode, handoffContext);
|
|
407
|
+
|
|
408
|
+
// Build task for target mode
|
|
409
|
+
const targetTask = {
|
|
410
|
+
...currentTask,
|
|
411
|
+
user_message: `[Handoff from ${currentMode}] ${filtered.priorOutput || ''}\n\nTask: ${currentTask.user_message || currentTask.task || 'Continue the engagement.'}`,
|
|
412
|
+
handoff_context: filtered,
|
|
413
|
+
};
|
|
414
|
+
|
|
415
|
+
await executeMode(targetMode, targetTask);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
await executeMode(mode.toUpperCase(), taskInput);
|
|
419
|
+
|
|
420
|
+
handoffResult.events = engine.events;
|
|
421
|
+
handoffResult.totalDurationS = performance.now() / 1000 - startTime;
|
|
422
|
+
|
|
423
|
+
return handoffResult;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
427
|
+
// runChain — explicit sequential multi-mode execution
|
|
428
|
+
// ---------------------------------------------------------------------------
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Run multiple modes in explicit sequence, passing results forward.
|
|
432
|
+
*
|
|
433
|
+
* Unlike runWithHandoffs (where the LLM decides to transfer), runChain
|
|
434
|
+
* executes a predetermined sequence of modes.
|
|
435
|
+
*
|
|
436
|
+
* @param {string[]} modes - Ordered list of mode names to execute
|
|
437
|
+
* @param {object} taskInput - Initial task parameters
|
|
438
|
+
* @param {object} [opts]
|
|
439
|
+
* @param {string|null} [opts.backend] - LLM backend override
|
|
440
|
+
* @param {*} [opts.context] - Sandbox context
|
|
441
|
+
* @param {Map<string, HandoffFilter>} [opts.filters] - Per-mode filters
|
|
442
|
+
* @param {Function} [opts.agentRunner] - Injectable runner for testing
|
|
443
|
+
* @returns {Promise<HandoffResult>}
|
|
444
|
+
*/
|
|
445
|
+
export async function runChain(modes, taskInput, opts = {}) {
|
|
446
|
+
if (!modes || modes.length === 0) {
|
|
447
|
+
return new HandoffResult({ error: 'No modes specified for chain' });
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const engine = new HandoffEngine({ filters: opts.filters });
|
|
451
|
+
const runner = opts.agentRunner || (await import('./runner.js')).runAutonomous;
|
|
452
|
+
const startTime = performance.now() / 1000;
|
|
453
|
+
const handoffResult = new HandoffResult();
|
|
454
|
+
|
|
455
|
+
let priorOutput = '';
|
|
456
|
+
let priorData = {};
|
|
457
|
+
|
|
458
|
+
for (let i = 0; i < modes.length; i++) {
|
|
459
|
+
const currentMode = modes[i].toUpperCase();
|
|
460
|
+
const isFirst = i === 0;
|
|
461
|
+
const prevMode = isFirst ? null : modes[i - 1].toUpperCase();
|
|
462
|
+
|
|
463
|
+
// Build task with prior context
|
|
464
|
+
let currentTask = { ...taskInput };
|
|
465
|
+
if (!isFirst && prevMode) {
|
|
466
|
+
const handoffContext = {
|
|
467
|
+
reason: `Chain step ${i + 1}/${modes.length}: ${prevMode}→${currentMode}`,
|
|
468
|
+
priorOutput,
|
|
469
|
+
priorFindings: priorData,
|
|
470
|
+
};
|
|
471
|
+
const filtered = engine.filterContext(handoffContext, prevMode, currentMode);
|
|
472
|
+
|
|
473
|
+
engine.recordTransfer(prevMode, currentMode, handoffContext);
|
|
474
|
+
|
|
475
|
+
currentTask = {
|
|
476
|
+
...taskInput,
|
|
477
|
+
user_message: `[Chain: ${prevMode}→${currentMode}, step ${i + 1}/${modes.length}]\n\nPrior results:\n${filtered.priorOutput || '(none)'}\n\nTask: ${taskInput.user_message || taskInput.task || 'Continue.'}`,
|
|
478
|
+
handoff_context: filtered,
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
let result;
|
|
483
|
+
try {
|
|
484
|
+
result = await runner(currentMode, currentTask, opts.backend, opts.context);
|
|
485
|
+
} catch (e) {
|
|
486
|
+
handoffResult.error = `Chain failed at ${currentMode} (step ${i + 1}/${modes.length}): ${e.message}`;
|
|
487
|
+
handoffResult.results.push(new ModeAgentResult({
|
|
488
|
+
mode: currentMode,
|
|
489
|
+
error: e.message,
|
|
490
|
+
}));
|
|
491
|
+
break;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
handoffResult.results.push(result);
|
|
495
|
+
handoffResult.totalTokensIn += result.tokensIn;
|
|
496
|
+
handoffResult.totalTokensOut += result.tokensOut;
|
|
497
|
+
|
|
498
|
+
priorOutput = result.outputText;
|
|
499
|
+
priorData = result.outputData;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
handoffResult.events = engine.events;
|
|
503
|
+
handoffResult.totalDurationS = performance.now() / 1000 - startTime;
|
|
504
|
+
|
|
505
|
+
return handoffResult;
|
|
506
|
+
}
|
|
@@ -384,3 +384,29 @@ function _makeBlueConfig() {
|
|
|
384
384
|
export function register(registerMode) {
|
|
385
385
|
registerMode('BLUE', _makeBlueConfig);
|
|
386
386
|
}
|
|
387
|
+
|
|
388
|
+
// ---------------------------------------------------------------------------
|
|
389
|
+
// Handoff filter — strips attack specifics, keeps detection targets
|
|
390
|
+
// ---------------------------------------------------------------------------
|
|
391
|
+
|
|
392
|
+
/** @type {import('../handoff.js').HandoffFilter} */
|
|
393
|
+
export const BLUE_HANDOFF_FILTER = {
|
|
394
|
+
transform: (context) => {
|
|
395
|
+
const filtered = {};
|
|
396
|
+
if (context.reason) filtered.reason = context.reason;
|
|
397
|
+
if (context.priorOutput) {
|
|
398
|
+
// Strip exploit details but keep TTPs and technique IDs
|
|
399
|
+
filtered.priorOutput = context.priorOutput
|
|
400
|
+
.replace(/\b(exploit|payload|shellcode|reverse.?shell|meterpreter)\b/gi, '[REDACTED]');
|
|
401
|
+
}
|
|
402
|
+
if (context.priorFindings) {
|
|
403
|
+
// Keep technique IDs and descriptions, strip exploit code
|
|
404
|
+
const { exploit, payload, shellcode, raw_output, ...safe } = context.priorFindings;
|
|
405
|
+
filtered.priorFindings = safe;
|
|
406
|
+
}
|
|
407
|
+
return filtered;
|
|
408
|
+
},
|
|
409
|
+
acceptsFrom: [], // BLUE accepts from any mode
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
export { BLUE_HANDOFF_FILTER as _blueHandoffFilter };
|
|
@@ -530,6 +530,33 @@ export function _makeRedConfig(category = 'web') {
|
|
|
530
530
|
});
|
|
531
531
|
}
|
|
532
532
|
|
|
533
|
+
// ---------------------------------------------------------------------------
|
|
534
|
+
// Handoff filter — strips raw exploit payloads, keeps finding summaries
|
|
535
|
+
// ---------------------------------------------------------------------------
|
|
536
|
+
|
|
537
|
+
/** @type {import('../handoff.js').HandoffFilter} */
|
|
538
|
+
export const RED_HANDOFF_FILTER = {
|
|
539
|
+
transform: (context) => {
|
|
540
|
+
const filtered = {};
|
|
541
|
+
// Keep structured summaries
|
|
542
|
+
if (context.reason) filtered.reason = context.reason;
|
|
543
|
+
if (context.priorOutput) {
|
|
544
|
+
// Strip raw payloads: long hex strings (0x + 8+ hex chars) and \x escape sequences
|
|
545
|
+
filtered.priorOutput = context.priorOutput
|
|
546
|
+
.replace(/0x[0-9a-f]{8,}/gi, '[REDACTED:payload]')
|
|
547
|
+
.replace(/(\\x[0-9a-f]{2}){3,}/gi, '[REDACTED:payload]')
|
|
548
|
+
.replace(/[\x00-\x08\x0e-\x1f]/g, '');
|
|
549
|
+
}
|
|
550
|
+
if (context.priorFindings) {
|
|
551
|
+
// Keep finding metadata, strip exploit details
|
|
552
|
+
const { exploit, payload, shellcode, ...safe } = context.priorFindings;
|
|
553
|
+
filtered.priorFindings = safe;
|
|
554
|
+
}
|
|
555
|
+
return filtered;
|
|
556
|
+
},
|
|
557
|
+
acceptsFrom: [], // RED accepts handoffs from any mode
|
|
558
|
+
};
|
|
559
|
+
|
|
533
560
|
// ---------------------------------------------------------------------------
|
|
534
561
|
// Registration
|
|
535
562
|
// ---------------------------------------------------------------------------
|
|
@@ -554,4 +581,5 @@ export {
|
|
|
554
581
|
_netPortScan,
|
|
555
582
|
_netConnectTcp,
|
|
556
583
|
_netSendPayload,
|
|
584
|
+
RED_HANDOFF_FILTER,
|
|
557
585
|
};
|