erosolar-cli 2.1.269 → 2.1.271
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capabilities/index.d.ts +0 -1
- package/dist/capabilities/index.d.ts.map +1 -1
- package/dist/capabilities/index.js +1 -1
- package/dist/capabilities/index.js.map +1 -1
- package/dist/capabilities/orchestrationCapability.d.ts.map +1 -1
- package/dist/capabilities/orchestrationCapability.js +107 -62
- package/dist/capabilities/orchestrationCapability.js.map +1 -1
- package/dist/core/iMessageVerification.d.ts +1 -1
- package/dist/core/infrastructureTemplates.d.ts +1 -1
- package/dist/core/infrastructureTemplates.js +5 -5
- package/dist/core/infrastructureTemplates.js.map +1 -1
- package/dist/core/persistentObjectiveStore.d.ts +13 -1
- package/dist/core/persistentObjectiveStore.d.ts.map +1 -1
- package/dist/core/persistentObjectiveStore.js.map +1 -1
- package/dist/core/securityDeliverableGenerator.d.ts +1 -1
- package/dist/core/securityDeliverableGenerator.d.ts.map +1 -1
- package/dist/core/securityDeliverableGenerator.js +3 -3
- package/dist/core/securityDeliverableGenerator.js.map +1 -1
- package/dist/core/toolRuntime.d.ts.map +1 -1
- package/dist/core/toolRuntime.js +11 -5
- package/dist/core/toolRuntime.js.map +1 -1
- package/dist/core/types.js.map +1 -1
- package/dist/core/unifiedOrchestrator.d.ts +265 -77
- package/dist/core/unifiedOrchestrator.d.ts.map +1 -1
- package/dist/core/unifiedOrchestrator.js +915 -254
- package/dist/core/unifiedOrchestrator.js.map +1 -1
- package/dist/providers/anthropicProvider.d.ts +1 -1
- package/dist/shell/interactiveShell.d.ts +1 -1
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +188 -207
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/tools/tao/rl.js +1 -1
- package/dist/tools/tao/rl.js.map +1 -1
- package/dist/tools/taoTools.js +5 -5
- package/dist/tools/taoTools.js.map +1 -1
- package/package.json +1 -1
- package/dist/capabilities/unifiedInvestigationCapability.d.ts +0 -22
- package/dist/capabilities/unifiedInvestigationCapability.d.ts.map +0 -1
- package/dist/capabilities/unifiedInvestigationCapability.js +0 -41
- package/dist/capabilities/unifiedInvestigationCapability.js.map +0 -1
- package/dist/core/agentOrchestrator.d.ts +0 -400
- package/dist/core/agentOrchestrator.d.ts.map +0 -1
- package/dist/core/agentOrchestrator.js +0 -2133
- package/dist/core/agentOrchestrator.js.map +0 -1
- package/dist/core/autoExecutionOrchestrator.d.ts +0 -172
- package/dist/core/autoExecutionOrchestrator.d.ts.map +0 -1
- package/dist/core/autoExecutionOrchestrator.js +0 -591
- package/dist/core/autoExecutionOrchestrator.js.map +0 -1
- package/dist/core/dualAgentOrchestrator.d.ts +0 -34
- package/dist/core/dualAgentOrchestrator.d.ts.map +0 -1
- package/dist/core/dualAgentOrchestrator.js +0 -94
- package/dist/core/dualAgentOrchestrator.js.map +0 -1
- package/dist/core/failureRecovery.d.ts +0 -26
- package/dist/core/failureRecovery.d.ts.map +0 -1
- package/dist/core/failureRecovery.js +0 -54
- package/dist/core/failureRecovery.js.map +0 -1
- package/dist/core/intelligentTestFlows.d.ts +0 -45
- package/dist/core/intelligentTestFlows.d.ts.map +0 -1
- package/dist/core/intelligentTestFlows.js +0 -25
- package/dist/core/intelligentTestFlows.js.map +0 -1
- package/dist/core/learningPersistence.d.ts +0 -58
- package/dist/core/learningPersistence.d.ts.map +0 -1
- package/dist/core/learningPersistence.js +0 -46
- package/dist/core/learningPersistence.js.map +0 -1
- package/dist/core/metricsTracker.d.ts +0 -40
- package/dist/core/metricsTracker.d.ts.map +0 -1
- package/dist/core/metricsTracker.js +0 -83
- package/dist/core/metricsTracker.js.map +0 -1
- package/dist/core/orchestration.d.ts +0 -534
- package/dist/core/orchestration.d.ts.map +0 -1
- package/dist/core/orchestration.js +0 -2009
- package/dist/core/orchestration.js.map +0 -1
- package/dist/core/performanceMonitor.d.ts +0 -30
- package/dist/core/performanceMonitor.d.ts.map +0 -1
- package/dist/core/performanceMonitor.js +0 -39
- package/dist/core/performanceMonitor.js.map +0 -1
- package/dist/core/selfEvolution.d.ts +0 -61
- package/dist/core/selfEvolution.d.ts.map +0 -1
- package/dist/core/selfEvolution.js +0 -38
- package/dist/core/selfEvolution.js.map +0 -1
- package/dist/core/selfImprovement.d.ts +0 -82
- package/dist/core/selfImprovement.d.ts.map +0 -1
- package/dist/core/selfImprovement.js +0 -25
- package/dist/core/selfImprovement.js.map +0 -1
- package/dist/core/unifiedFraudOrchestrator.d.ts +0 -738
- package/dist/core/unifiedFraudOrchestrator.d.ts.map +0 -1
- package/dist/core/unifiedFraudOrchestrator.js +0 -3312
- package/dist/core/unifiedFraudOrchestrator.js.map +0 -1
- package/dist/core/unifiedRealOrchestrator.d.ts +0 -126
- package/dist/core/unifiedRealOrchestrator.d.ts.map +0 -1
- package/dist/core/unifiedRealOrchestrator.js +0 -475
- package/dist/core/unifiedRealOrchestrator.js.map +0 -1
- package/dist/core/userDefenseOrchestrator.d.ts +0 -202
- package/dist/core/userDefenseOrchestrator.d.ts.map +0 -1
- package/dist/core/userDefenseOrchestrator.js +0 -1006
- package/dist/core/userDefenseOrchestrator.js.map +0 -1
- package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.d.ts +0 -3
- package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.d.ts.map +0 -1
- package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.js +0 -14
- package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.js.map +0 -1
- package/dist/tools/frontendTestingTools.d.ts +0 -9
- package/dist/tools/frontendTestingTools.d.ts.map +0 -1
- package/dist/tools/frontendTestingTools.js +0 -291
- package/dist/tools/frontendTestingTools.js.map +0 -1
- package/dist/tools/unifiedInvestigationTools.d.ts +0 -19
- package/dist/tools/unifiedInvestigationTools.d.ts.map +0 -1
- package/dist/tools/unifiedInvestigationTools.js +0 -1163
- package/dist/tools/unifiedInvestigationTools.js.map +0 -1
- package/scripts/human-verification.mjs +0 -380
- package/scripts/isolated-verification-runner.mjs +0 -364
- package/scripts/isolated-verification-wrapper.mjs +0 -276
- package/scripts/verify-task-completion.mjs +0 -143
|
@@ -1,2133 +0,0 @@
|
|
|
1
|
-
import { TASK_FULLY_COMPLETE } from './constants.js';
|
|
2
|
-
import { TaskCompletionDetector, WRITE_TOOLS } from './taskCompletionDetector.js';
|
|
3
|
-
import { planAttackChain, selectNextTechnique, executeTechniqueInChain, techniqueRegistry, parseNaturalLanguageIntent, validateAuthorizationContext, getRLState, listActiveChains, } from '../tools/tao/index.js';
|
|
4
|
-
import { getInfrastructureStack, generateDeploymentScript, generateTeardownScript, generateOpsecChecklist, } from './infrastructureTemplates.js';
|
|
5
|
-
import { getAgencyContacts, getApplicableLegalFrameworks, } from './governmentProcedures.js';
|
|
6
|
-
import { createTaoTools } from '../tools/taoTools.js';
|
|
7
|
-
// Kill-chain phase definitions for full attack chain execution
|
|
8
|
-
export const KILL_CHAIN_PHASES = [
|
|
9
|
-
'reconnaissance',
|
|
10
|
-
'initial_access',
|
|
11
|
-
'persistence',
|
|
12
|
-
'privilege_escalation',
|
|
13
|
-
'defense_evasion',
|
|
14
|
-
'credential_access',
|
|
15
|
-
'discovery',
|
|
16
|
-
'lateral_movement',
|
|
17
|
-
'collection',
|
|
18
|
-
'command_control',
|
|
19
|
-
'exfiltration',
|
|
20
|
-
'impact',
|
|
21
|
-
];
|
|
22
|
-
/**
|
|
23
|
-
* Single-pass orchestration: drive the agent to finish as much as possible
|
|
24
|
-
* in one go, with strong bias toward real actions over planning.
|
|
25
|
-
*/
|
|
26
|
-
export class AgentOrchestrator {
|
|
27
|
-
agent;
|
|
28
|
-
constructor(agent) {
|
|
29
|
-
this.agent = agent;
|
|
30
|
-
}
|
|
31
|
-
async runToCompletion(request, options) {
|
|
32
|
-
// Early safety check for potentially harmful requests
|
|
33
|
-
const harmfulCheck = this.checkForHarmfulRequest(request);
|
|
34
|
-
if (harmfulCheck.isHarmful) {
|
|
35
|
-
return {
|
|
36
|
-
finalResponse: harmfulCheck.response,
|
|
37
|
-
toolsUsed: [],
|
|
38
|
-
planOnly: false,
|
|
39
|
-
tookAction: false,
|
|
40
|
-
completion: {
|
|
41
|
-
isComplete: true,
|
|
42
|
-
confidence: 1.0,
|
|
43
|
-
signals: {
|
|
44
|
-
hasExplicitCompletionStatement: true,
|
|
45
|
-
hasIncompleteWorkIndicators: false,
|
|
46
|
-
hasPendingActionIndicators: false,
|
|
47
|
-
hasErrorIndicators: false,
|
|
48
|
-
hasFollowUpQuestions: false,
|
|
49
|
-
toolsUsedInLastResponse: 0,
|
|
50
|
-
lastToolWasReadOnly: false,
|
|
51
|
-
consecutiveResponsesWithoutTools: 0,
|
|
52
|
-
hasRecentFileWrites: false,
|
|
53
|
-
hasRecentCommits: false,
|
|
54
|
-
todoItemsPending: 0,
|
|
55
|
-
todoItemsCompleted: 0,
|
|
56
|
-
mentionsFutureWork: false,
|
|
57
|
-
completionConfidence: 1.0,
|
|
58
|
-
},
|
|
59
|
-
reason: 'Request declined for safety reasons',
|
|
60
|
-
shouldVerify: false,
|
|
61
|
-
},
|
|
62
|
-
exitReason: 'refusal',
|
|
63
|
-
statusSummary: null,
|
|
64
|
-
limitations: [],
|
|
65
|
-
recommendations: [],
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
const streaming = options?.streaming ?? true;
|
|
69
|
-
const enforceActions = options?.enforceActions ?? true;
|
|
70
|
-
const verificationMode = options?.verificationMode ?? 'auto';
|
|
71
|
-
const singlePassFocus = options?.singlePassFocus ?? true;
|
|
72
|
-
const maxAttempts = Math.max(1, options?.maxAttempts ?? 6);
|
|
73
|
-
const completionDetector = new TaskCompletionDetector();
|
|
74
|
-
const contextualGuidance = this.buildContextualGuidance(request);
|
|
75
|
-
const attempts = [];
|
|
76
|
-
const runAttempt = async (prompt, enforceActionsForAttempt) => {
|
|
77
|
-
const response = (await this.agent.send(prompt, streaming)).trim();
|
|
78
|
-
const toolExecutions = this.agent.drainToolExecutions();
|
|
79
|
-
toolExecutions.forEach(exec => completionDetector.recordToolCall(exec.name, exec.success, exec.hasOutput));
|
|
80
|
-
const toolsUsed = toolExecutions.filter(exec => exec.success).map(exec => exec.name);
|
|
81
|
-
const completion = completionDetector.analyzeCompletion(response, toolsUsed);
|
|
82
|
-
const analysis = this.analyzeResponse(response, toolsUsed, enforceActionsForAttempt);
|
|
83
|
-
const exitReason = this.resolveExitReason(analysis, completion, enforceActionsForAttempt, verificationMode);
|
|
84
|
-
return {
|
|
85
|
-
response,
|
|
86
|
-
toolsUsed,
|
|
87
|
-
toolExecutions,
|
|
88
|
-
completion,
|
|
89
|
-
analysis,
|
|
90
|
-
exitReason,
|
|
91
|
-
};
|
|
92
|
-
};
|
|
93
|
-
const primaryPrompt = singlePassFocus
|
|
94
|
-
? this.buildSinglePassPrompt(request, enforceActions, contextualGuidance)
|
|
95
|
-
: request.trim();
|
|
96
|
-
let nextPrompt = primaryPrompt;
|
|
97
|
-
let finalAttempt = null;
|
|
98
|
-
let exitReason = 'incomplete';
|
|
99
|
-
let maxAttemptsHit = false;
|
|
100
|
-
let consecutiveNoProgress = 0;
|
|
101
|
-
let consecutiveReadOnlyOnly = 0;
|
|
102
|
-
while (attempts.length < maxAttempts) {
|
|
103
|
-
const attempt = await runAttempt(nextPrompt, enforceActions);
|
|
104
|
-
attempts.push(attempt);
|
|
105
|
-
finalAttempt = attempt;
|
|
106
|
-
exitReason = attempt.exitReason;
|
|
107
|
-
// CRITICAL: Stop immediately on refusals - these are terminal states
|
|
108
|
-
if (attempt.exitReason === 'refusal') {
|
|
109
|
-
break;
|
|
110
|
-
}
|
|
111
|
-
// Track consecutive no-progress attempts
|
|
112
|
-
// Key insight: tool usage alone doesn't mean progress if the model is stuck
|
|
113
|
-
// or refusing. We need to detect actual forward momentum.
|
|
114
|
-
const terminalNoProgress = attempt.exitReason === 'empty-response' ||
|
|
115
|
-
attempt.exitReason === 'no-action' ||
|
|
116
|
-
attempt.exitReason === 'blocked';
|
|
117
|
-
// Detect response repetition - if we keep getting similar responses, we're stuck
|
|
118
|
-
// This catches cases where model calls tools but produces same reasoning/refusal
|
|
119
|
-
const responseFingerprint = this.computeResponseFingerprint(attempt.response);
|
|
120
|
-
const previousAttempt = attempts.length >= 2 ? attempts[attempts.length - 2] : undefined;
|
|
121
|
-
const isRepeatedResponse = previousAttempt !== undefined &&
|
|
122
|
-
this.computeResponseFingerprint(previousAttempt.response) === responseFingerprint &&
|
|
123
|
-
responseFingerprint !== '';
|
|
124
|
-
// Progress is considered made when:
|
|
125
|
-
// 1. Task is complete or needs verification
|
|
126
|
-
// 2. OR tools were used with a DIFFERENT response (model is working through the problem)
|
|
127
|
-
const isCompleting = attempt.exitReason === 'complete' ||
|
|
128
|
-
attempt.exitReason === 'verification-needed';
|
|
129
|
-
// Check if only read-only tools were used (no write/action tools)
|
|
130
|
-
const usedWriteTool = attempt.toolsUsed.some(t => WRITE_TOOLS.has(t));
|
|
131
|
-
const onlyReadOnlyTools = attempt.toolsUsed.length > 0 && !usedWriteTool;
|
|
132
|
-
// Track consecutive read-only-only iterations
|
|
133
|
-
// This catches silent refusals where model calls list_files repeatedly
|
|
134
|
-
if (onlyReadOnlyTools && !isCompleting) {
|
|
135
|
-
consecutiveReadOnlyOnly++;
|
|
136
|
-
}
|
|
137
|
-
else {
|
|
138
|
-
consecutiveReadOnlyOnly = 0;
|
|
139
|
-
}
|
|
140
|
-
const isProgressingWithTools = attempt.toolsUsed.length > 0 && !isRepeatedResponse;
|
|
141
|
-
const hasRealProgress = isCompleting || isProgressingWithTools;
|
|
142
|
-
// No progress if: terminal state, no completion AND no tool progress, OR repeated response
|
|
143
|
-
if (terminalNoProgress || (!hasRealProgress && !isCompleting) || (isRepeatedResponse && !isCompleting)) {
|
|
144
|
-
consecutiveNoProgress++;
|
|
145
|
-
}
|
|
146
|
-
else {
|
|
147
|
-
consecutiveNoProgress = 0;
|
|
148
|
-
}
|
|
149
|
-
// CRITICAL: Stop immediately after 2 consecutive attempts with no progress
|
|
150
|
-
// This prevents infinite loops when model refuses silently or returns empty responses
|
|
151
|
-
if (consecutiveNoProgress >= 2) {
|
|
152
|
-
break;
|
|
153
|
-
}
|
|
154
|
-
// CRITICAL: Stop if model keeps using only read-only tools without making task progress
|
|
155
|
-
// This catches DeepSeek/others silently refusing by calling list_files repeatedly
|
|
156
|
-
if (consecutiveReadOnlyOnly >= 3) {
|
|
157
|
-
break;
|
|
158
|
-
}
|
|
159
|
-
// Single exit check - combines completion and limit detection
|
|
160
|
-
maxAttemptsHit = attempts.length >= maxAttempts;
|
|
161
|
-
const continueRun = this.shouldContinue(attempts, maxAttempts);
|
|
162
|
-
if (!continueRun || maxAttemptsHit) {
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
nextPrompt = this.buildContinuationPrompt(request, attempt, attempts.length + 1, enforceActions, contextualGuidance);
|
|
166
|
-
}
|
|
167
|
-
if (!finalAttempt) {
|
|
168
|
-
throw new Error('Orchestrator did not record any attempts.');
|
|
169
|
-
}
|
|
170
|
-
const toolsUsed = Array.from(new Set(attempts.flatMap((attempt) => attempt.toolExecutions.filter(exec => exec.success).map(exec => exec.name))));
|
|
171
|
-
const tookAction = attempts.some(attempt => attempt.analysis.tookAction);
|
|
172
|
-
const planOnly = attempts.every(attempt => attempt.analysis.planOnly);
|
|
173
|
-
const { summary, limitations, recommendations } = await this.buildStatusSummary(request, finalAttempt.response, finalAttempt.analysis, finalAttempt.completion, toolsUsed, exitReason, attempts.length, maxAttemptsHit, attempts);
|
|
174
|
-
return {
|
|
175
|
-
finalResponse: finalAttempt.response,
|
|
176
|
-
toolsUsed,
|
|
177
|
-
planOnly,
|
|
178
|
-
tookAction,
|
|
179
|
-
completion: finalAttempt.completion,
|
|
180
|
-
exitReason,
|
|
181
|
-
statusSummary: summary,
|
|
182
|
-
limitations,
|
|
183
|
-
recommendations,
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
resolveExitReason(analysis, completion, enforceActions, verificationMode) {
|
|
187
|
-
if (analysis.empty) {
|
|
188
|
-
return 'empty-response';
|
|
189
|
-
}
|
|
190
|
-
// AI safety refusal - terminal state, do not retry
|
|
191
|
-
if (analysis.refusal) {
|
|
192
|
-
return 'refusal';
|
|
193
|
-
}
|
|
194
|
-
if (analysis.blocked) {
|
|
195
|
-
return 'blocked';
|
|
196
|
-
}
|
|
197
|
-
if (enforceActions && !analysis.tookAction) {
|
|
198
|
-
return 'no-action';
|
|
199
|
-
}
|
|
200
|
-
const readyToStop = analysis.readyToStop || completion.isComplete;
|
|
201
|
-
if (verificationMode === 'auto' && completion.shouldVerify && readyToStop) {
|
|
202
|
-
return 'verification-needed';
|
|
203
|
-
}
|
|
204
|
-
if (readyToStop) {
|
|
205
|
-
return 'complete';
|
|
206
|
-
}
|
|
207
|
-
return 'incomplete';
|
|
208
|
-
}
|
|
209
|
-
async buildStatusSummary(request, response, _analysis, completion, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, attempts) {
|
|
210
|
-
// Generate intelligent insight - the only thing users care about
|
|
211
|
-
const summary = await this.generateInsightSummary(request, response, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, completion, attempts);
|
|
212
|
-
// Minimal limitations/recommendations - only surface critical issues
|
|
213
|
-
const limitations = [];
|
|
214
|
-
const recommendations = [];
|
|
215
|
-
if (toolsUsed.length === 0) {
|
|
216
|
-
limitations.push('No tools ran.');
|
|
217
|
-
}
|
|
218
|
-
if (maxAttemptsHit && exitReason !== 'complete') {
|
|
219
|
-
limitations.push('Hit attempt limit.');
|
|
220
|
-
}
|
|
221
|
-
if (completion.shouldVerify) {
|
|
222
|
-
recommendations.push('Verify the output.');
|
|
223
|
-
}
|
|
224
|
-
if (toolsUsed.length > 0) {
|
|
225
|
-
recommendations.push('Run `npm run verify-core` (lint, type-check, focused tests) before shipping.');
|
|
226
|
-
}
|
|
227
|
-
return { summary, limitations, recommendations };
|
|
228
|
-
}
|
|
229
|
-
/**
|
|
230
|
-
* Generate comprehensive insight summary for user transparency.
|
|
231
|
-
* Shows what was done, what might be incomplete, and potential concerns.
|
|
232
|
-
*/
|
|
233
|
-
async generateInsightSummary(_request, _response, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, completion, attempts) {
|
|
234
|
-
const history = this.agent.getHistory();
|
|
235
|
-
const ctx = this.extractContextAndLimitations(history, toolsUsed, attempts);
|
|
236
|
-
// Build comprehensive summary sections
|
|
237
|
-
const sections = [];
|
|
238
|
-
// 1. What was done - key findings and actions
|
|
239
|
-
if (ctx.keyFindings.length > 0) {
|
|
240
|
-
const findings = ctx.keyFindings.slice(0, 3).join('; ');
|
|
241
|
-
sections.push(`Done: ${findings}`);
|
|
242
|
-
}
|
|
243
|
-
// 2. Tool execution summary
|
|
244
|
-
const allTools = attempts.flatMap(a => a.toolExecutions);
|
|
245
|
-
const successfulTools = allTools.filter(t => t.success);
|
|
246
|
-
const failedTools = allTools.filter(t => !t.success);
|
|
247
|
-
if (allTools.length > 0) {
|
|
248
|
-
const toolSummary = [];
|
|
249
|
-
if (successfulTools.length > 0) {
|
|
250
|
-
const uniqueTools = [...new Set(successfulTools.map(t => t.name))];
|
|
251
|
-
toolSummary.push(`${successfulTools.length} tool calls (${uniqueTools.slice(0, 4).join(', ')}${uniqueTools.length > 4 ? '...' : ''})`);
|
|
252
|
-
}
|
|
253
|
-
if (failedTools.length > 0) {
|
|
254
|
-
toolSummary.push(`${failedTools.length} failed`);
|
|
255
|
-
}
|
|
256
|
-
if (toolSummary.length > 0) {
|
|
257
|
-
sections.push(`Tools: ${toolSummary.join(', ')}`);
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
else if (toolsUsed.length === 0) {
|
|
261
|
-
sections.push('Tools: none used (response may be ungrounded)');
|
|
262
|
-
}
|
|
263
|
-
// Validation/telemetry
|
|
264
|
-
sections.push('Validation: non-blocking bash/tool telemetry enabled; prefer `npm run verify-core` for final checks.');
|
|
265
|
-
// 3. Verification status
|
|
266
|
-
const verifyParts = [];
|
|
267
|
-
if (ctx.ranTests)
|
|
268
|
-
verifyParts.push('tests ran');
|
|
269
|
-
if (ctx.hasVerification && !ctx.ranTests)
|
|
270
|
-
verifyParts.push('build checked');
|
|
271
|
-
if (ctx.madeEdits && !ctx.ranTests)
|
|
272
|
-
verifyParts.push('edits NOT tested');
|
|
273
|
-
if (ctx.hasErrors)
|
|
274
|
-
verifyParts.push('ERRORS detected');
|
|
275
|
-
if (ctx.hasWarnings)
|
|
276
|
-
verifyParts.push('warnings present');
|
|
277
|
-
if (verifyParts.length > 0) {
|
|
278
|
-
sections.push(`Status: ${verifyParts.join(', ')}`);
|
|
279
|
-
}
|
|
280
|
-
// 4. Completion status and concerns
|
|
281
|
-
const concerns = [];
|
|
282
|
-
if (maxAttemptsHit && exitReason !== 'complete') {
|
|
283
|
-
concerns.push(`stopped after ${attemptsCount} attempts (may be incomplete)`);
|
|
284
|
-
}
|
|
285
|
-
if (exitReason === 'incomplete') {
|
|
286
|
-
concerns.push('task may not be fully complete');
|
|
287
|
-
}
|
|
288
|
-
if (completion.shouldVerify) {
|
|
289
|
-
concerns.push('manual verification recommended');
|
|
290
|
-
}
|
|
291
|
-
// 5. Honest limitations - what AI cannot guarantee
|
|
292
|
-
if (ctx.limitations.length > 0) {
|
|
293
|
-
const limitationsStr = ctx.limitations.slice(0, 3).join(', ');
|
|
294
|
-
concerns.push(limitationsStr);
|
|
295
|
-
}
|
|
296
|
-
// Add concerns section if any
|
|
297
|
-
if (concerns.length > 0) {
|
|
298
|
-
sections.push(`Note: ${concerns.join('; ')}`);
|
|
299
|
-
}
|
|
300
|
-
// 6. Potential hallucination warning for ungrounded responses
|
|
301
|
-
if (toolsUsed.length === 0 && !ctx.hasVerification) {
|
|
302
|
-
sections.push('⚠️ No tool verification - response based on model knowledge only');
|
|
303
|
-
}
|
|
304
|
-
// Return null only for clean, verified completions
|
|
305
|
-
if (sections.length === 0 ||
|
|
306
|
-
(exitReason === 'complete' && !completion.shouldVerify &&
|
|
307
|
-
ctx.hasVerification && !ctx.hasErrors && ctx.limitations.length === 0)) {
|
|
308
|
-
return null;
|
|
309
|
-
}
|
|
310
|
-
return sections.join(' | ');
|
|
311
|
-
}
|
|
312
|
-
/**
|
|
313
|
-
* Extract context AND limitations from the run.
|
|
314
|
-
* Be honest about what we know vs don't know.
|
|
315
|
-
*/
|
|
316
|
-
extractContextAndLimitations(history, toolsUsed, attempts) {
|
|
317
|
-
const keyFindings = [];
|
|
318
|
-
let hasErrors = false;
|
|
319
|
-
let hasWarnings = false;
|
|
320
|
-
let hasVerification = false;
|
|
321
|
-
let ranTests = false;
|
|
322
|
-
let madeEdits = false;
|
|
323
|
-
const limitations = [];
|
|
324
|
-
// Analyze tool usage
|
|
325
|
-
const allTools = attempts.flatMap(a => a.toolExecutions);
|
|
326
|
-
madeEdits = allTools.some(t => t.success && this.isEditTool(t.name));
|
|
327
|
-
const validationTools = allTools.filter(t => t.success && this.isValidationTool(t.name));
|
|
328
|
-
ranTests = validationTools.length > 0 || this.hasValidationEvidence(history);
|
|
329
|
-
hasVerification = ranTests || validationTools.length > 0;
|
|
330
|
-
// Scan conversation for substance
|
|
331
|
-
const relevant = history.filter(m => m.role !== 'system').slice(-8);
|
|
332
|
-
for (const msg of relevant) {
|
|
333
|
-
const content = String(msg.content || '');
|
|
334
|
-
if (msg.role === 'tool') {
|
|
335
|
-
// Detect errors
|
|
336
|
-
if (/\b(error|failed|failure|exception|ENOENT|EACCES|denied)\b/i.test(content)) {
|
|
337
|
-
hasErrors = true;
|
|
338
|
-
const errorLine = content.split('\n').find(l => /error|fail|exception/i.test(l));
|
|
339
|
-
if (errorLine)
|
|
340
|
-
keyFindings.push(errorLine.slice(0, 80));
|
|
341
|
-
}
|
|
342
|
-
// Detect warnings
|
|
343
|
-
if (/\bwarn(ing)?\b/i.test(content)) {
|
|
344
|
-
hasWarnings = true;
|
|
345
|
-
}
|
|
346
|
-
// Detect test results
|
|
347
|
-
if (/(\d+)\s*(tests?\s*)?(passed|passing)/i.test(content)) {
|
|
348
|
-
const match = content.match(/(\d+)\s*(tests?\s*)?(passed|passing)/i);
|
|
349
|
-
if (match)
|
|
350
|
-
keyFindings.push(`${match[1]} tests passed`);
|
|
351
|
-
}
|
|
352
|
-
if (/(\d+)\s*(tests?\s*)?(failed|failing)/i.test(content)) {
|
|
353
|
-
const match = content.match(/(\d+)\s*(tests?\s*)?(failed|failing)/i);
|
|
354
|
-
if (match)
|
|
355
|
-
keyFindings.push(`${match[1]} tests failed`);
|
|
356
|
-
hasErrors = true;
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
if (msg.role === 'assistant') {
|
|
360
|
-
// Extract conclusions
|
|
361
|
-
const conclusionMatch = content.match(/(?:found|fixed|created|updated|added|removed|the issue was|the problem was|completed)[^.\n]{5,60}/i);
|
|
362
|
-
if (conclusionMatch) {
|
|
363
|
-
keyFindings.push(conclusionMatch[0].trim());
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
// Intelligent limitation analysis - context-aware reasoning about what could go wrong
|
|
368
|
-
const toolNames = allTools.map(t => t.name);
|
|
369
|
-
const assistantMessages = relevant.filter(m => m.role === 'assistant').map(m => String(m.content || ''));
|
|
370
|
-
const lastAssistant = assistantMessages[assistantMessages.length - 1] || '';
|
|
371
|
-
// 1. Verification gaps
|
|
372
|
-
if (!ranTests) {
|
|
373
|
-
if (madeEdits) {
|
|
374
|
-
limitations.push('code edits made but no tests executed - behavior unverified');
|
|
375
|
-
}
|
|
376
|
-
else {
|
|
377
|
-
limitations.push('no automated tests ran');
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
if (madeEdits && !hasVerification) {
|
|
381
|
-
limitations.push('no build or lint check performed after edits');
|
|
382
|
-
}
|
|
383
|
-
// 2. Tool execution concerns
|
|
384
|
-
if (toolsUsed.length === 0) {
|
|
385
|
-
limitations.push('response not grounded in tool execution - may contain assumptions');
|
|
386
|
-
}
|
|
387
|
-
const failedToolNames = allTools.filter(t => !t.success).map(t => t.name);
|
|
388
|
-
if (failedToolNames.length > 0) {
|
|
389
|
-
const unique = [...new Set(failedToolNames)];
|
|
390
|
-
limitations.push(`${unique.length} tool(s) failed: ${unique.slice(0, 3).join(', ')}${unique.length > 3 ? '...' : ''}`);
|
|
391
|
-
}
|
|
392
|
-
// 3. Potential hallucination indicators
|
|
393
|
-
if (lastAssistant) {
|
|
394
|
-
// Check for speculative language suggesting uncertainty
|
|
395
|
-
if (/\b(might|may|could|possibly|probably|likely|assume|guess|think)\b/i.test(lastAssistant)) {
|
|
396
|
-
if (!/\b(definitely|confirmed|verified|tested|checked)\b/i.test(lastAssistant)) {
|
|
397
|
-
limitations.push('response contains speculative language - verify claims');
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
// Check for claims without tool evidence
|
|
401
|
-
if (/\b(fixed|resolved|completed|works now)\b/i.test(lastAssistant) && !ranTests && !hasVerification) {
|
|
402
|
-
limitations.push('completion claimed without verification - manually confirm');
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
// 4. Edge case and scope concerns
|
|
406
|
-
if (madeEdits) {
|
|
407
|
-
// Analyze edit scope
|
|
408
|
-
const editCount = allTools.filter(t => ['edit', 'edit_file', 'write', 'write_file'].includes(t.name) && t.success).length;
|
|
409
|
-
if (editCount > 3) {
|
|
410
|
-
limitations.push(`${editCount} files edited - review each for correctness`);
|
|
411
|
-
}
|
|
412
|
-
if (!ranTests) {
|
|
413
|
-
limitations.push('edge cases and error handling paths untested');
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
// 5. Security and production concerns
|
|
417
|
-
if (toolNames.includes('bash')) {
|
|
418
|
-
const bashOutputs = relevant.filter(m => m.role === 'tool').map(m => String(m.content || ''));
|
|
419
|
-
const hasSensitiveOps = bashOutputs.some(o => /\b(rm -rf|chmod 777|sudo|password|secret|token|key=)\b/i.test(o));
|
|
420
|
-
if (hasSensitiveOps) {
|
|
421
|
-
limitations.push('sensitive operations detected - review security implications');
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
// 6. Incomplete task indicators
|
|
425
|
-
if (hasErrors && !keyFindings.some(f => /fixed|resolved/i.test(f))) {
|
|
426
|
-
limitations.push('errors detected but not explicitly resolved');
|
|
427
|
-
}
|
|
428
|
-
return { keyFindings, hasErrors, hasWarnings, hasVerification, ranTests, madeEdits, limitations };
|
|
429
|
-
}
|
|
430
|
-
buildSinglePassPrompt(originalRequest, enforceActions, contextualGuidance) {
|
|
431
|
-
const actionLine = enforceActions
|
|
432
|
-
? '- Use tools to complete the task. Act immediately instead of re-planning.'
|
|
433
|
-
: '- Provide your best response.';
|
|
434
|
-
const guidanceBlock = this.formatContextualGuidance(contextualGuidance);
|
|
435
|
-
const playbook = this.buildExecutionPlaybook(enforceActions);
|
|
436
|
-
return `${originalRequest.trim()}
|
|
437
|
-
|
|
438
|
-
${actionLine}
|
|
439
|
-
- Finish as much as possible now.
|
|
440
|
-
- Favor direct execution over additional planning.
|
|
441
|
-
- Run quick verification (tests/build) when claiming completion.
|
|
442
|
-
- Show outputs as evidence.
|
|
443
|
-
- End with TASK_FULLY_COMPLETE when done.${guidanceBlock}${playbook}`;
|
|
444
|
-
}
|
|
445
|
-
shouldContinue(attempts, maxAttempts) {
|
|
446
|
-
if (!attempts.length) {
|
|
447
|
-
return false;
|
|
448
|
-
}
|
|
449
|
-
const latest = attempts[attempts.length - 1];
|
|
450
|
-
const exitReason = latest.exitReason;
|
|
451
|
-
if (attempts.length >= maxAttempts) {
|
|
452
|
-
return false;
|
|
453
|
-
}
|
|
454
|
-
// Terminal states - never retry
|
|
455
|
-
if (exitReason === 'refusal' || exitReason === 'complete') {
|
|
456
|
-
return false;
|
|
457
|
-
}
|
|
458
|
-
// Detect stuck loops early - if we have 2+ consecutive attempts without completion, stop
|
|
459
|
-
// This catches cases where the model refuses silently, calls tools without progress,
|
|
460
|
-
// or returns empty/no-action repeatedly
|
|
461
|
-
if (attempts.length >= 2) {
|
|
462
|
-
const recentAttempts = attempts.slice(-2);
|
|
463
|
-
const allNoCompletion = recentAttempts.every(a => a.exitReason !== 'complete' && a.exitReason !== 'verification-needed');
|
|
464
|
-
if (allNoCompletion) {
|
|
465
|
-
// Check if responses are similar (stuck in a loop)
|
|
466
|
-
const fingerprints = recentAttempts.map(a => this.computeResponseFingerprint(a.response));
|
|
467
|
-
if (fingerprints[0] === fingerprints[1] && fingerprints[0] !== '') {
|
|
468
|
-
return false; // Stuck producing same response
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
// Allow recovery attempts for common failure modes before giving up
|
|
473
|
-
if (exitReason === 'no-action') {
|
|
474
|
-
const noActionAttempts = attempts.filter((a) => a.exitReason === 'no-action').length;
|
|
475
|
-
return noActionAttempts < 2;
|
|
476
|
-
}
|
|
477
|
-
if (exitReason === 'verification-needed') {
|
|
478
|
-
const verificationAttempts = attempts.filter((a) => a.exitReason === 'verification-needed').length;
|
|
479
|
-
return verificationAttempts < 2;
|
|
480
|
-
}
|
|
481
|
-
if (exitReason === 'empty-response') {
|
|
482
|
-
const emptyResponses = attempts.filter((a) => a.exitReason === 'empty-response').length;
|
|
483
|
-
return emptyResponses < 2;
|
|
484
|
-
}
|
|
485
|
-
if (exitReason === 'blocked') {
|
|
486
|
-
const blockedResponses = attempts.filter((a) => a.exitReason === 'blocked').length;
|
|
487
|
-
return blockedResponses < 2;
|
|
488
|
-
}
|
|
489
|
-
// Detect tool loops - if last 2 attempts used tools, are incomplete, AND produced similar responses
|
|
490
|
-
if (exitReason === 'incomplete' && attempts.length >= 2) {
|
|
491
|
-
const recentAttempts = attempts.slice(-2);
|
|
492
|
-
const allIncomplete = recentAttempts.every(a => a.exitReason === 'incomplete' && a.toolsUsed.length > 0);
|
|
493
|
-
if (allIncomplete) {
|
|
494
|
-
// Only stop if responses are also similar (model is stuck in a pattern)
|
|
495
|
-
const fingerprints = recentAttempts.map(a => this.computeResponseFingerprint(a.response));
|
|
496
|
-
if (fingerprints[0] === fingerprints[1] && fingerprints[0] !== '') {
|
|
497
|
-
return false; // Same response pattern with tools - stuck in a loop
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
return exitReason === 'incomplete';
|
|
502
|
-
}
|
|
503
|
-
buildContinuationPrompt(originalRequest, attempt, nextAttemptNumber, enforceActions, contextualGuidance) {
|
|
504
|
-
const actionLine = enforceActions
|
|
505
|
-
? '- Continue with tools.'
|
|
506
|
-
: '- Provide your best response.';
|
|
507
|
-
const reason = this.describeContinuationReason(attempt);
|
|
508
|
-
const toolContext = attempt.toolsUsed.length
|
|
509
|
-
? `Tools used: ${attempt.toolsUsed.join(', ')}.`
|
|
510
|
-
: '';
|
|
511
|
-
const guidanceBlock = this.formatContextualGuidance(contextualGuidance);
|
|
512
|
-
const recoveryGuidance = this.buildRecoveryGuidance(attempt);
|
|
513
|
-
const recoveryBlock = recoveryGuidance.length
|
|
514
|
-
? `\nRecovery focus:\n${recoveryGuidance.map((line) => `- ${line}`).join('\n')}`
|
|
515
|
-
: '';
|
|
516
|
-
const playbook = this.buildExecutionPlaybook(enforceActions);
|
|
517
|
-
return `${originalRequest.trim()}
|
|
518
|
-
|
|
519
|
-
Pass ${nextAttemptNumber}: ${reason}
|
|
520
|
-
${toolContext}
|
|
521
|
-
${actionLine}
|
|
522
|
-
- End with TASK_FULLY_COMPLETE when done.${guidanceBlock}${recoveryBlock}${playbook}`;
|
|
523
|
-
}
|
|
524
|
-
buildRecoveryGuidance(attempt) {
|
|
525
|
-
const guidance = [];
|
|
526
|
-
const madeEdits = attempt.toolExecutions.some((tool) => tool.success && this.isEditTool(tool.name));
|
|
527
|
-
const ranValidation = attempt.toolExecutions.some((tool) => tool.success && this.isValidationTool(tool.name));
|
|
528
|
-
if (attempt.exitReason === 'no-action' || attempt.analysis.planOnly || !attempt.analysis.tookAction) {
|
|
529
|
-
guidance.push('Skip more planning; call tools now (read/search -> edit/run checks).');
|
|
530
|
-
}
|
|
531
|
-
if (attempt.exitReason === 'verification-needed') {
|
|
532
|
-
guidance.push('Run verification now (tests/build) and include the output before stopping.');
|
|
533
|
-
}
|
|
534
|
-
if (madeEdits && !ranValidation) {
|
|
535
|
-
guidance.push('You edited code; run fast validation (tests/lint/build) and show the output before stopping.');
|
|
536
|
-
}
|
|
537
|
-
if (attempt.exitReason === 'empty-response') {
|
|
538
|
-
guidance.push('Provide a substantive update with tool calls and outputs; previous reply was empty.');
|
|
539
|
-
}
|
|
540
|
-
if (attempt.exitReason === 'blocked') {
|
|
541
|
-
guidance.push('Proceed with available context without asking for more input; use tools to unblock.');
|
|
542
|
-
}
|
|
543
|
-
const failedTools = attempt.toolExecutions.filter((tool) => !tool.success).map((tool) => tool.name);
|
|
544
|
-
if (failedTools.length > 0) {
|
|
545
|
-
const uniqueFailed = Array.from(new Set(failedTools));
|
|
546
|
-
guidance.push(`Retry failed tools with fixes or alternatives (${uniqueFailed.join(', ')}).`);
|
|
547
|
-
}
|
|
548
|
-
return guidance;
|
|
549
|
-
}
|
|
550
|
-
buildContextualGuidance(request) {
|
|
551
|
-
const normalized = request.toLowerCase();
|
|
552
|
-
const guidance = [];
|
|
553
|
-
guidance.push('Ground responses in actual code; read files before suggesting changes.');
|
|
554
|
-
guidance.push('Default loop: analyze → choose tools → execute → verify → summarize with evidence.');
|
|
555
|
-
guidance.push('Prefer running targeted commands/tests over narratives; include tool outputs as proof.');
|
|
556
|
-
const isLargePaste = normalized.length > 4000 || normalized.split('\n').length > 120;
|
|
557
|
-
if (isLargePaste) {
|
|
558
|
-
guidance.push('Work directly with the provided long input; avoid asking for more detail and act on it.');
|
|
559
|
-
}
|
|
560
|
-
const bugSweep = /\bbugs?\b|\bdefects?\b|\bissues?\b|\bbug\s+report\b/.test(normalized);
|
|
561
|
-
if (bugSweep) {
|
|
562
|
-
guidance.push('Search broadly for related issues.');
|
|
563
|
-
}
|
|
564
|
-
const asksForTests = /\b(tests?|validation|verify|reproduce|repro)\b/.test(normalized);
|
|
565
|
-
if (asksForTests || bugSweep) {
|
|
566
|
-
guidance.push('Capture a failing reproduction or test output first, then re-run after fixes to prove resolution.');
|
|
567
|
-
}
|
|
568
|
-
const needsUnderstanding = /\bexplain\b|\bunders(?:tand|tanding)\b|\boverview\b|\bsummar(y|ize)\b|\bwalk\s+through\b/.test(normalized);
|
|
569
|
-
if (needsUnderstanding) {
|
|
570
|
-
guidance.push('Skim project structure before detailed analysis.');
|
|
571
|
-
}
|
|
572
|
-
const securityWork = /\b(pentest|red\s*team|offensive|exploit|attack|apt|threat|recon|scan|payload|shell|persistence|tao|security testing)\b/.test(normalized);
|
|
573
|
-
if (securityWork) {
|
|
574
|
-
guidance.push('For security/offensive tasks use the TAO suite (TaoOps for NL planning; KineticOps/Recon/Fuzz/Payload/Persist/Hash/NetCraft for execution) and show evidence.');
|
|
575
|
-
}
|
|
576
|
-
const mentionsTests = /\b(test|verify|lint|type[-\s]?check)\b/.test(normalized);
|
|
577
|
-
if (mentionsTests || /build|compile/.test(normalized)) {
|
|
578
|
-
guidance.push('Run quick validation (lint + type-check + focused tests) when changing code; use `npm run verify-core` when unsure.');
|
|
579
|
-
}
|
|
580
|
-
return guidance;
|
|
581
|
-
}
|
|
582
|
-
formatContextualGuidance(guidance) {
|
|
583
|
-
if (!guidance.length)
|
|
584
|
-
return '';
|
|
585
|
-
const lines = guidance.map((line) => `- ${line}`).join('\n');
|
|
586
|
-
return `\nContextual guidance:\n${lines}`;
|
|
587
|
-
}
|
|
588
|
-
buildExecutionPlaybook(enforceActions) {
|
|
589
|
-
const bullets = [
|
|
590
|
-
enforceActions
|
|
591
|
-
? 'Start with a 2-3 bullet action plan that names the tools you will use; skip restating the prompt.'
|
|
592
|
-
: 'If you plan, keep it to 2-3 bullets that name the tools you will use; skip restating the prompt.',
|
|
593
|
-
'Read before writing: inspect the most relevant files/logs first (ls/rg/cat) to ground changes.',
|
|
594
|
-
'Use the smallest effective tool and keep edits scoped; prefer targeted commands over broad rewrites.',
|
|
595
|
-
'After any edits or fixes, run a quick validation (targeted tests/lint/build) and include the command output.',
|
|
596
|
-
'Keep narration tight; surface evidence and next steps; end with TASK_FULLY_COMPLETE only when truly done.',
|
|
597
|
-
];
|
|
598
|
-
return `\nExecution playbook:\n${bullets.map((line) => `- ${line}`).join('\n')}`;
|
|
599
|
-
}
|
|
600
|
-
isEditTool(name) {
|
|
601
|
-
const normalized = name.toLowerCase();
|
|
602
|
-
return ['edit', 'write', 'apply', 'patch', 'codemod', 'refactor', 'format'].some(keyword => normalized.includes(keyword));
|
|
603
|
-
}
|
|
604
|
-
isValidationTool(name) {
|
|
605
|
-
const normalized = name.toLowerCase();
|
|
606
|
-
return ['test', 'lint', 'build', 'check', 'verify', 'coverage', 'ci', 'type-check'].some(keyword => normalized.includes(keyword));
|
|
607
|
-
}
|
|
608
|
-
hasValidationEvidence(history) {
|
|
609
|
-
const recentTools = history.filter(m => m.role === 'tool').slice(-10);
|
|
610
|
-
return recentTools.some((message) => {
|
|
611
|
-
const content = String(message.content || '').toLowerCase();
|
|
612
|
-
return (/\b(jest|mocha|vitest|pytest|go test|npm test|pnpm test|yarn test|pytest|unittest|integration test|end-to-end)\b/.test(content) ||
|
|
613
|
-
/\b(lint|eslint|tsc|type[- ]?check|typecheck)\b/.test(content) ||
|
|
614
|
-
/\b(build|webpack|vite build|next build|pnpm build|npm run build)\b/.test(content) ||
|
|
615
|
-
(/\btests?\b/.test(content) && (/\bpass(ed)?\b/.test(content) || /\bfail(ed)?\b/.test(content))));
|
|
616
|
-
});
|
|
617
|
-
}
|
|
618
|
-
describeContinuationReason(attempt) {
|
|
619
|
-
switch (attempt.exitReason) {
|
|
620
|
-
case 'no-action':
|
|
621
|
-
return 'previous response lacked concrete actions or tool calls.';
|
|
622
|
-
case 'verification-needed':
|
|
623
|
-
return attempt.completion.reason
|
|
624
|
-
? `verification was requested (${attempt.completion.reason}).`
|
|
625
|
-
: 'verification was requested.';
|
|
626
|
-
case 'empty-response':
|
|
627
|
-
return 'previous response was empty or non-substantive.';
|
|
628
|
-
case 'blocked':
|
|
629
|
-
return 'model reported being blocked; continue with available context and tools.';
|
|
630
|
-
case 'refusal':
|
|
631
|
-
return 'request was declined.';
|
|
632
|
-
case 'incomplete':
|
|
633
|
-
default:
|
|
634
|
-
return attempt.completion.reason
|
|
635
|
-
? `previous response did not reach a completion signal (${attempt.completion.reason}).`
|
|
636
|
-
: 'no completion signal detected in the previous response.';
|
|
637
|
-
}
|
|
638
|
-
}
|
|
639
|
-
analyzeResponse(response, toolsUsed, enforceActions) {
|
|
640
|
-
const trimmed = response.trim();
|
|
641
|
-
const wordCount = this.wordCount(trimmed);
|
|
642
|
-
const empty = trimmed.length === 0;
|
|
643
|
-
const planOnly = this.isPlanOnly(trimmed);
|
|
644
|
-
const tookAction = toolsUsed.length > 0 || this.hasActionLanguage(trimmed);
|
|
645
|
-
const hasCompletionMarker = this.hasExplicitCompletion(trimmed);
|
|
646
|
-
const completionContradiction = this.hasCompletionContradiction(trimmed);
|
|
647
|
-
const continuing = this.hasContinuingSignal(trimmed);
|
|
648
|
-
const blocked = this.isBlockedResponse(trimmed);
|
|
649
|
-
const refusal = this.isRefusalResponse(trimmed);
|
|
650
|
-
const tooLight = wordCount < 80;
|
|
651
|
-
const substantiveAnswer = this.hasSubstantiveAnswer(trimmed, {
|
|
652
|
-
planOnly,
|
|
653
|
-
tookAction,
|
|
654
|
-
continuing,
|
|
655
|
-
blocked,
|
|
656
|
-
completionContradiction,
|
|
657
|
-
wordCount,
|
|
658
|
-
});
|
|
659
|
-
const readyToStop = (hasCompletionMarker &&
|
|
660
|
-
!completionContradiction &&
|
|
661
|
-
!continuing &&
|
|
662
|
-
(!enforceActions || tookAction || !tooLight)) ||
|
|
663
|
-
substantiveAnswer;
|
|
664
|
-
return {
|
|
665
|
-
empty,
|
|
666
|
-
planOnly,
|
|
667
|
-
tookAction,
|
|
668
|
-
readyToStop,
|
|
669
|
-
blocked,
|
|
670
|
-
refusal,
|
|
671
|
-
};
|
|
672
|
-
}
|
|
673
|
-
isPlanOnly(response) {
|
|
674
|
-
if (!response.trim())
|
|
675
|
-
return false;
|
|
676
|
-
const planIndicators = [
|
|
677
|
-
/\bplan\b/i,
|
|
678
|
-
/\bapproach\b/i,
|
|
679
|
-
/\bsteps?:\b/i,
|
|
680
|
-
/\bstep\s+1\b/i,
|
|
681
|
-
/\bstart by\b/i,
|
|
682
|
-
/\bfirst[, ]/i,
|
|
683
|
-
/\bthen\s+(?:we|i|run|do|take|handle|address|implement|fix)\b/i,
|
|
684
|
-
/\bnext\s+(?:we|i|up)\b/i,
|
|
685
|
-
/\bwe\s+will\b/i,
|
|
686
|
-
/\bi['\u2019]?ll\b/i,
|
|
687
|
-
/\bi\s+will\b/i,
|
|
688
|
-
/\bhere(?:'|\u2019)s\s+the\s+plan\b/i,
|
|
689
|
-
/\bplan:\b/i,
|
|
690
|
-
/\bapproach:\b/i,
|
|
691
|
-
/\bexecution\s+plan\b/i,
|
|
692
|
-
];
|
|
693
|
-
return planIndicators.some((pattern) => pattern.test(response));
|
|
694
|
-
}
|
|
695
|
-
hasExplicitCompletion(response) {
|
|
696
|
-
if (!response.trim())
|
|
697
|
-
return false;
|
|
698
|
-
if (response.includes(TASK_FULLY_COMPLETE))
|
|
699
|
-
return true;
|
|
700
|
-
const completionPatterns = [
|
|
701
|
-
/\btask(s)? (is|are)?\s*complete\b/i,
|
|
702
|
-
/\ball done\b/i,
|
|
703
|
-
/\bcleanup complete\b/i,
|
|
704
|
-
/\bnothing (else\s*)?to do\b/i,
|
|
705
|
-
/\bno junk (found|remaining)\b/i,
|
|
706
|
-
];
|
|
707
|
-
return completionPatterns.some((pattern) => pattern.test(response));
|
|
708
|
-
}
|
|
709
|
-
hasCompletionContradiction(response) {
|
|
710
|
-
// Skip contradiction check if TASK_FULLY_COMPLETE is present - explicit marker takes precedence
|
|
711
|
-
if (response.includes(TASK_FULLY_COMPLETE)) {
|
|
712
|
-
return false;
|
|
713
|
-
}
|
|
714
|
-
const contradictions = [
|
|
715
|
-
/not\s+yet\s+(done|complete|finished|integrated|implemented)/i,
|
|
716
|
-
/\bstill\s+(need|needs|pending|left)\s+to\b/i, // more specific: "still need to"
|
|
717
|
-
/\bpending\s+(work|tasks?|items?|changes?|fixes?)\b/i, // only actual pending work
|
|
718
|
-
/\bremaining\s+(work|tasks?|items?|to\s+do)\b/i, // only actual remaining work
|
|
719
|
-
/\bnot\s+(working|functional)\b/i,
|
|
720
|
-
/\btests?\s+(are\s+)?failing\b/i,
|
|
721
|
-
/\bto\s+be\s+(done|completed|fixed|implemented)\b/i,
|
|
722
|
-
/\btodo\b/i,
|
|
723
|
-
/\bfixme\b/i,
|
|
724
|
-
];
|
|
725
|
-
return contradictions.some((pattern) => pattern.test(response));
|
|
726
|
-
}
|
|
727
|
-
hasActionLanguage(response) {
|
|
728
|
-
const actionPatterns = [
|
|
729
|
-
/\bremoved\b/i,
|
|
730
|
-
/\bdeleted\b/i,
|
|
731
|
-
/\bcreated\b/i,
|
|
732
|
-
/\badded\b/i,
|
|
733
|
-
/\bupdated\b/i,
|
|
734
|
-
/\brefactored\b/i,
|
|
735
|
-
/\bcommitted\b/i,
|
|
736
|
-
/\bran\b.+\btests?\b/i,
|
|
737
|
-
/\bcleaned\b/i,
|
|
738
|
-
/\bexecuted\b/i,
|
|
739
|
-
/\bapplied\b/i,
|
|
740
|
-
/\bpatched\b/i,
|
|
741
|
-
/\bimplemented\b/i,
|
|
742
|
-
/\bfixed\b/i,
|
|
743
|
-
/\bverified\b/i,
|
|
744
|
-
/\bvalidated\b/i,
|
|
745
|
-
/\bdeployed\b/i,
|
|
746
|
-
/\bmerged\b/i,
|
|
747
|
-
];
|
|
748
|
-
return actionPatterns.some((pattern) => pattern.test(response));
|
|
749
|
-
}
|
|
750
|
-
hasContinuingSignal(response) {
|
|
751
|
-
if (!response.trim())
|
|
752
|
-
return false;
|
|
753
|
-
const patterns = [
|
|
754
|
-
/\b(i'?m|i am)\s+(continuing|working|starting|beginning)\b/i,
|
|
755
|
-
/\bwill\s+(now|next)\s+(run|do|start|take|handle|work on)\b/i,
|
|
756
|
-
/\babout\s+to\s+(run|execute|start|begin)\b/i,
|
|
757
|
-
/\bnext\s+up\b/i,
|
|
758
|
-
/\bplan\s+to\s+start\b/i,
|
|
759
|
-
];
|
|
760
|
-
return patterns.some((pattern) => pattern.test(response));
|
|
761
|
-
}
|
|
762
|
-
isBlockedResponse(response) {
|
|
763
|
-
if (!response.trim())
|
|
764
|
-
return false;
|
|
765
|
-
const patterns = [
|
|
766
|
-
/\bneed(s)?\s+(more\s+)?(info|information|details|clarification)\b/i,
|
|
767
|
-
/\bmissing\s+(api\s*key|credentials?|access|permission|token)\b/i,
|
|
768
|
-
/\b(no|not)\s+authorized\b/i,
|
|
769
|
-
/\b(i['\u2019]?m|i am|currently|still)\s+blocked\b/i,
|
|
770
|
-
/\bblocked\s+(on|by|due to|because of)\b/i,
|
|
771
|
-
/\bblocked\b.{0,40}\b(access|permission|token|credential|quota|limit)\b/i,
|
|
772
|
-
/\bcannot\s+(proceed|continue|start|run|access)\b/i,
|
|
773
|
-
];
|
|
774
|
-
return patterns.some((pattern) => pattern.test(response));
|
|
775
|
-
}
|
|
776
|
-
/**
|
|
777
|
-
* Detects AI safety refusals - when the model declines to help with a request.
|
|
778
|
-
* These are terminal states that should not be retried.
|
|
779
|
-
*/
|
|
780
|
-
isRefusalResponse(response) {
|
|
781
|
-
if (!response.trim())
|
|
782
|
-
return false;
|
|
783
|
-
const patterns = [
|
|
784
|
-
// Direct refusals
|
|
785
|
-
/\b(i\s+)?(can'?t|cannot|won'?t|will\s+not)\s+(help|assist|do\s+that|provide|comply|support)\b/i,
|
|
786
|
-
/\bi'?m\s+(not\s+able|unable)\s+to\s+(help|assist|do\s+that|provide)\b/i,
|
|
787
|
-
/\bi\s+(refuse|decline)\s+to\b/i,
|
|
788
|
-
/\bsorry[,.]?\s+(but\s+)?(i\s+)?(can'?t|cannot|won'?t|am\s+not\s+able)\b/i,
|
|
789
|
-
// Harmful/dangerous content refusals
|
|
790
|
-
/\b(harmful|dangerous|illegal|unethical|malicious|destructive)\s+(content|request|activity)\b/i,
|
|
791
|
-
/\bagainst\s+(my|the)\s+(guidelines|policies|principles|ethics)\b/i,
|
|
792
|
-
/\bviolates?\s+(my|the)?\s*(guidelines|policies|terms|safety)\b/i,
|
|
793
|
-
/\bnot\s+(designed|intended|programmed|able)\s+to\s+(help|assist)\s+with\b/i,
|
|
794
|
-
// Safety/ethics language
|
|
795
|
-
/\b(ethical|safety|responsible)\s+(concerns?|guidelines?|reasons?)\b/i,
|
|
796
|
-
/\bcould\s+(cause|lead\s+to)\s+(harm|damage|injury)\b/i,
|
|
797
|
-
// DeepSeek specific patterns
|
|
798
|
-
/\bI'm an AI assistant.{0,50}(cannot|won't|can't)\b/i,
|
|
799
|
-
];
|
|
800
|
-
return patterns.some((pattern) => pattern.test(response));
|
|
801
|
-
}
|
|
802
|
-
wordCount(text) {
|
|
803
|
-
if (!text.trim())
|
|
804
|
-
return 0;
|
|
805
|
-
return text.trim().split(/\s+/).length;
|
|
806
|
-
}
|
|
807
|
-
/**
|
|
808
|
-
* Compute a simple fingerprint of a response for repetition detection.
|
|
809
|
-
* Normalizes whitespace and takes first N significant words to detect
|
|
810
|
-
* when the model is producing the same response repeatedly.
|
|
811
|
-
*/
|
|
812
|
-
computeResponseFingerprint(response) {
|
|
813
|
-
if (!response?.trim())
|
|
814
|
-
return '';
|
|
815
|
-
// Normalize: lowercase, collapse whitespace, take first 200 chars
|
|
816
|
-
const normalized = response.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
817
|
-
// Extract key content - skip common filler words
|
|
818
|
-
const words = normalized.split(' ')
|
|
819
|
-
.filter(w => w.length > 3)
|
|
820
|
-
.slice(0, 30);
|
|
821
|
-
return words.join(' ');
|
|
822
|
-
}
|
|
823
|
-
/**
|
|
824
|
-
* Detects when a response is sufficiently detailed to stand on its own,
|
|
825
|
-
* avoiding false "incomplete" flags for thorough answers without explicit
|
|
826
|
-
* completion markers.
|
|
827
|
-
*/
|
|
828
|
-
hasSubstantiveAnswer(response, options) {
|
|
829
|
-
if (!response.trim())
|
|
830
|
-
return false;
|
|
831
|
-
if (options.planOnly || options.continuing || options.blocked || options.completionContradiction) {
|
|
832
|
-
return false;
|
|
833
|
-
}
|
|
834
|
-
const threshold = options.tookAction ? 80 : 120;
|
|
835
|
-
return options.wordCount >= threshold;
|
|
836
|
-
}
|
|
837
|
-
/**
|
|
838
|
-
* Early safety check for potentially harmful requests.
|
|
839
|
-
* Returns immediately with a helpful response instead of looping.
|
|
840
|
-
*
|
|
841
|
-
* This catches obvious cases before the model even processes them,
|
|
842
|
-
* preventing wasted compute and infinite "thinking" loops.
|
|
843
|
-
*/
|
|
844
|
-
checkForHarmfulRequest(request) {
|
|
845
|
-
const lower = request.toLowerCase();
|
|
846
|
-
// Known major companies/services that should NEVER be attacked without explicit authorization
|
|
847
|
-
const protectedTargets = [
|
|
848
|
-
'google', 'microsoft', 'apple', 'amazon', 'meta', 'facebook',
|
|
849
|
-
'twitter', 'x.com', 'linkedin', 'github', 'gitlab', 'netflix',
|
|
850
|
-
'spotify', 'openai', 'anthropic', 'cloudflare', 'aws', 'azure',
|
|
851
|
-
'gcp', 'oracle', 'ibm', 'cisco', 'adobe', 'salesforce', 'slack',
|
|
852
|
-
'zoom', 'dropbox', 'paypal', 'stripe', 'visa', 'mastercard',
|
|
853
|
-
];
|
|
854
|
-
// Action words that indicate malicious intent
|
|
855
|
-
const maliciousActions = [
|
|
856
|
-
'attack', 'hack', 'pwn', 'exploit', 'compromise', 'breach',
|
|
857
|
-
'ddos', 'dos', 'flood', 'crash', 'destroy', 'deface',
|
|
858
|
-
'steal', 'exfiltrate', 'dump', 'leak',
|
|
859
|
-
];
|
|
860
|
-
// Check for combined patterns: malicious action + protected target
|
|
861
|
-
const hasMaliciousAction = maliciousActions.some(action => lower.includes(action));
|
|
862
|
-
const hasProtectedTarget = protectedTargets.some(target => lower.includes(target));
|
|
863
|
-
// Only trigger for clear cases of unauthorized attacks
|
|
864
|
-
if (hasMaliciousAction && hasProtectedTarget) {
|
|
865
|
-
// Check for legitimate contexts that override this
|
|
866
|
-
const legitimateContexts = [
|
|
867
|
-
'ctf', 'capture the flag', 'hackthebox', 'tryhackme',
|
|
868
|
-
'lab', 'sandbox', 'authorized', 'pentest', 'penetration test',
|
|
869
|
-
'bug bounty', 'scope', 'engagement', 'test environment',
|
|
870
|
-
];
|
|
871
|
-
const hasLegitimateContext = legitimateContexts.some(ctx => lower.includes(ctx));
|
|
872
|
-
if (!hasLegitimateContext) {
|
|
873
|
-
return {
|
|
874
|
-
isHarmful: true,
|
|
875
|
-
response: `I can't help with unauthorized attacks against production systems or services.
|
|
876
|
-
|
|
877
|
-
For legitimate security work, I can help with:
|
|
878
|
-
• **CTF challenges** - Specify the CTF platform or challenge name
|
|
879
|
-
• **Authorized pentests** - Share the scope document or confirm you have written authorization
|
|
880
|
-
• **Bug bounty** - Confirm the target is in scope for a bug bounty program
|
|
881
|
-
• **Lab environments** - Specify it's a personal lab, HackTheBox, TryHackMe, etc.
|
|
882
|
-
• **Security research** - Describe the defensive or educational purpose
|
|
883
|
-
|
|
884
|
-
What authorized security work can I help you with?`,
|
|
885
|
-
};
|
|
886
|
-
}
|
|
887
|
-
}
|
|
888
|
-
// Also catch generic "attack everything" requests without any target
|
|
889
|
-
if (hasMaliciousAction && !lower.includes('ctf') && !lower.includes('lab')) {
|
|
890
|
-
const veryGenericPatterns = [
|
|
891
|
-
/attack\s+(any|random|all)/i,
|
|
892
|
-
/hack\s+(into|the|any)/i,
|
|
893
|
-
/ddos\s+(the|any|all)/i,
|
|
894
|
-
];
|
|
895
|
-
if (veryGenericPatterns.some(p => p.test(request))) {
|
|
896
|
-
return {
|
|
897
|
-
isHarmful: true,
|
|
898
|
-
response: `I need more context about what you're trying to accomplish.
|
|
899
|
-
|
|
900
|
-
If this is for:
|
|
901
|
-
• **CTF/Competition** - What platform or challenge?
|
|
902
|
-
• **Learning** - What concept are you trying to understand?
|
|
903
|
-
• **Authorized testing** - What's the scope and authorization?
|
|
904
|
-
|
|
905
|
-
Please provide context so I can help appropriately.`,
|
|
906
|
-
};
|
|
907
|
-
}
|
|
908
|
-
}
|
|
909
|
-
return { isHarmful: false, response: '' };
|
|
910
|
-
}
|
|
911
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
912
|
-
// ATTACK CHAIN EXECUTION
|
|
913
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
914
|
-
/**
|
|
915
|
-
* Detect if a request should trigger attack chain execution.
|
|
916
|
-
* Uses natural language parsing to identify security operation intents.
|
|
917
|
-
*/
|
|
918
|
-
shouldExecuteAttackChain(request, mode = 'auto') {
|
|
919
|
-
if (mode === 'disabled') {
|
|
920
|
-
return { shouldExecute: false, intent: null };
|
|
921
|
-
}
|
|
922
|
-
if (mode === 'forced') {
|
|
923
|
-
const intent = parseNaturalLanguageIntent(request);
|
|
924
|
-
return { shouldExecute: true, intent };
|
|
925
|
-
}
|
|
926
|
-
// Auto mode: parse intent and check for security operations
|
|
927
|
-
const intent = parseNaturalLanguageIntent(request);
|
|
928
|
-
const securityActions = ['recon', 'scan', 'enumerate', 'exploit', 'extract', 'test', 'monitor'];
|
|
929
|
-
const isSecurityOp = securityActions.includes(intent.action);
|
|
930
|
-
const hasTargets = intent.targets.length > 0;
|
|
931
|
-
return {
|
|
932
|
-
shouldExecute: isSecurityOp && hasTargets,
|
|
933
|
-
intent: isSecurityOp ? intent : null,
|
|
934
|
-
};
|
|
935
|
-
}
|
|
936
|
-
/**
|
|
937
|
-
* Validate authorization context for attack chain execution.
|
|
938
|
-
* Ensures proper authorization before executing security techniques.
|
|
939
|
-
*/
|
|
940
|
-
validateAttackAuthorization(intent, authContext) {
|
|
941
|
-
// Use the TAO authorization validator
|
|
942
|
-
const validation = validateAuthorizationContext(intent);
|
|
943
|
-
// Additional checks for protected targets
|
|
944
|
-
const protectedTargets = [
|
|
945
|
-
'google.com', 'microsoft.com', 'apple.com', 'amazon.com',
|
|
946
|
-
'facebook.com', 'twitter.com', 'github.com',
|
|
947
|
-
];
|
|
948
|
-
const targetingProtected = intent.targets.some(t => protectedTargets.some(pt => t.toLowerCase().includes(pt)));
|
|
949
|
-
if (targetingProtected && !validation.valid) {
|
|
950
|
-
return {
|
|
951
|
-
authorized: false,
|
|
952
|
-
reason: 'Targeting protected production systems without explicit authorization',
|
|
953
|
-
warnings: [
|
|
954
|
-
'Protected targets detected. Provide CTF/lab context or authorization scope.',
|
|
955
|
-
...validation.warnings,
|
|
956
|
-
],
|
|
957
|
-
};
|
|
958
|
-
}
|
|
959
|
-
// Check for explicit authorization context
|
|
960
|
-
if (authContext) {
|
|
961
|
-
const authorizedContexts = ['ctf', 'lab', 'pentest', 'bug-bounty', 'authorized'];
|
|
962
|
-
const hasExplicitAuth = authorizedContexts.some(ctx => authContext.toLowerCase().includes(ctx));
|
|
963
|
-
if (hasExplicitAuth) {
|
|
964
|
-
return {
|
|
965
|
-
authorized: true,
|
|
966
|
-
reason: `Explicit authorization: ${authContext}`,
|
|
967
|
-
warnings: validation.warnings,
|
|
968
|
-
};
|
|
969
|
-
}
|
|
970
|
-
}
|
|
971
|
-
return {
|
|
972
|
-
authorized: validation.valid,
|
|
973
|
-
reason: validation.type,
|
|
974
|
-
warnings: validation.warnings,
|
|
975
|
-
};
|
|
976
|
-
}
|
|
977
|
-
/**
|
|
978
|
-
* Execute an attack chain with TAO techniques.
|
|
979
|
-
* Returns results including all executed techniques and artifacts.
|
|
980
|
-
*/
|
|
981
|
-
async executeAttackChain(request, options = {}) {
|
|
982
|
-
const intent = parseNaturalLanguageIntent(request);
|
|
983
|
-
const targets = options.targets ?? intent.targets;
|
|
984
|
-
// Normalize depth - map 'comprehensive' to 'deep'
|
|
985
|
-
const rawDepth = options.depth ?? intent.depth;
|
|
986
|
-
const depth = rawDepth === 'comprehensive' ? 'deep' : rawDepth;
|
|
987
|
-
const stealth = options.stealth ?? intent.constraints.includes('stealth');
|
|
988
|
-
// Validate authorization
|
|
989
|
-
const auth = this.validateAttackAuthorization(intent, options.authContext);
|
|
990
|
-
if (!auth.authorized) {
|
|
991
|
-
throw new Error(`Attack chain execution not authorized: ${auth.reason}`);
|
|
992
|
-
}
|
|
993
|
-
const executedTechniques = [];
|
|
994
|
-
const phasesCompleted = new Set();
|
|
995
|
-
const startTime = Date.now();
|
|
996
|
-
// Execute chain for each target
|
|
997
|
-
for (const target of targets) {
|
|
998
|
-
const chain = planAttackChain(intent, `Attack chain: ${target}`);
|
|
999
|
-
while (chain.state === 'planning' || chain.state === 'executing') {
|
|
1000
|
-
const action = selectNextTechnique(chain);
|
|
1001
|
-
if (!action)
|
|
1002
|
-
break;
|
|
1003
|
-
const technique = techniqueRegistry.get(action.id);
|
|
1004
|
-
if (!technique)
|
|
1005
|
-
continue;
|
|
1006
|
-
const params = {
|
|
1007
|
-
target,
|
|
1008
|
-
depth,
|
|
1009
|
-
stealth,
|
|
1010
|
-
timeout: depth === 'deep' ? 60000 : depth === 'standard' ? 30000 : 10000,
|
|
1011
|
-
context: {
|
|
1012
|
-
chainId: chain.id,
|
|
1013
|
-
phase: technique.phase,
|
|
1014
|
-
previousArtifacts: executedTechniques
|
|
1015
|
-
.filter(t => t.success)
|
|
1016
|
-
.flatMap(t => t.artifacts),
|
|
1017
|
-
},
|
|
1018
|
-
};
|
|
1019
|
-
try {
|
|
1020
|
-
const { result } = await executeTechniqueInChain(chain, action, params);
|
|
1021
|
-
executedTechniques.push({
|
|
1022
|
-
id: technique.id,
|
|
1023
|
-
name: technique.name,
|
|
1024
|
-
phase: technique.phase,
|
|
1025
|
-
success: result.success,
|
|
1026
|
-
duration: result.duration,
|
|
1027
|
-
artifacts: result.artifacts,
|
|
1028
|
-
});
|
|
1029
|
-
if (result.success) {
|
|
1030
|
-
phasesCompleted.add(technique.phase);
|
|
1031
|
-
}
|
|
1032
|
-
options.onProgress?.(chain, technique.id, result);
|
|
1033
|
-
}
|
|
1034
|
-
catch (err) {
|
|
1035
|
-
// Log but continue chain execution
|
|
1036
|
-
executedTechniques.push({
|
|
1037
|
-
id: technique.id,
|
|
1038
|
-
name: technique.name,
|
|
1039
|
-
phase: technique.phase,
|
|
1040
|
-
success: false,
|
|
1041
|
-
duration: 0,
|
|
1042
|
-
artifacts: [{ type: 'error', data: String(err) }],
|
|
1043
|
-
});
|
|
1044
|
-
}
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
const successCount = executedTechniques.filter(t => t.success).length;
|
|
1048
|
-
return {
|
|
1049
|
-
chain: planAttackChain(intent, request), // Return final chain state
|
|
1050
|
-
techniques: executedTechniques,
|
|
1051
|
-
totalDuration: Date.now() - startTime,
|
|
1052
|
-
successRate: executedTechniques.length > 0
|
|
1053
|
-
? successCount / executedTechniques.length
|
|
1054
|
-
: 0,
|
|
1055
|
-
phasesCompleted: Array.from(phasesCompleted),
|
|
1056
|
-
};
|
|
1057
|
-
}
|
|
1058
|
-
/**
|
|
1059
|
-
* Run orchestration with optional attack chain integration.
|
|
1060
|
-
* When attack chain mode is enabled, security operations are executed
|
|
1061
|
-
* directly through TAO techniques rather than relying on LLM tool calls.
|
|
1062
|
-
*/
|
|
1063
|
-
async runWithAttackChain(request, options = {}) {
|
|
1064
|
-
const attackChainMode = options.attackChainMode ?? 'auto';
|
|
1065
|
-
// Check if we should execute attack chain
|
|
1066
|
-
const { shouldExecute, intent } = this.shouldExecuteAttackChain(request, attackChainMode);
|
|
1067
|
-
if (!shouldExecute || !intent) {
|
|
1068
|
-
// Fall back to normal orchestration
|
|
1069
|
-
return this.runToCompletion(request, options);
|
|
1070
|
-
}
|
|
1071
|
-
// Validate authorization
|
|
1072
|
-
const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
|
|
1073
|
-
if (!auth.authorized) {
|
|
1074
|
-
return {
|
|
1075
|
-
finalResponse: `Cannot execute security operation: ${auth.reason}\n\n${auth.warnings.join('\n')}`,
|
|
1076
|
-
toolsUsed: [],
|
|
1077
|
-
planOnly: false,
|
|
1078
|
-
tookAction: false,
|
|
1079
|
-
completion: {
|
|
1080
|
-
isComplete: true,
|
|
1081
|
-
confidence: 1.0,
|
|
1082
|
-
signals: {
|
|
1083
|
-
hasExplicitCompletionStatement: true,
|
|
1084
|
-
hasIncompleteWorkIndicators: false,
|
|
1085
|
-
hasPendingActionIndicators: false,
|
|
1086
|
-
hasErrorIndicators: false,
|
|
1087
|
-
hasFollowUpQuestions: false,
|
|
1088
|
-
toolsUsedInLastResponse: 0,
|
|
1089
|
-
lastToolWasReadOnly: false,
|
|
1090
|
-
consecutiveResponsesWithoutTools: 0,
|
|
1091
|
-
hasRecentFileWrites: false,
|
|
1092
|
-
hasRecentCommits: false,
|
|
1093
|
-
todoItemsPending: 0,
|
|
1094
|
-
todoItemsCompleted: 0,
|
|
1095
|
-
mentionsFutureWork: false,
|
|
1096
|
-
completionConfidence: 1.0,
|
|
1097
|
-
},
|
|
1098
|
-
reason: 'Authorization required',
|
|
1099
|
-
shouldVerify: false,
|
|
1100
|
-
},
|
|
1101
|
-
exitReason: 'attack-chain-aborted',
|
|
1102
|
-
statusSummary: `Authorization required: ${auth.reason}`,
|
|
1103
|
-
limitations: auth.warnings,
|
|
1104
|
-
recommendations: [
|
|
1105
|
-
'Provide explicit authorization context (CTF, lab, pentest scope)',
|
|
1106
|
-
'Use --auth-context flag to specify authorization',
|
|
1107
|
-
],
|
|
1108
|
-
};
|
|
1109
|
-
}
|
|
1110
|
-
// Execute attack chain
|
|
1111
|
-
try {
|
|
1112
|
-
// Normalize depth for attack chain
|
|
1113
|
-
const attackRawDepth = options.attackDepth ?? intent.depth;
|
|
1114
|
-
const attackDepth = attackRawDepth === 'comprehensive' ? 'deep' : attackRawDepth;
|
|
1115
|
-
const chainResult = await this.executeAttackChain(request, {
|
|
1116
|
-
targets: options.attackTargets ?? intent.targets,
|
|
1117
|
-
depth: attackDepth,
|
|
1118
|
-
stealth: options.stealthMode ?? intent.constraints.includes('stealth'),
|
|
1119
|
-
authContext: options.authorizationContext,
|
|
1120
|
-
onProgress: options.onAttackChainProgress,
|
|
1121
|
-
});
|
|
1122
|
-
// Build response summary
|
|
1123
|
-
const summary = this.buildAttackChainSummary(chainResult);
|
|
1124
|
-
return {
|
|
1125
|
-
finalResponse: summary,
|
|
1126
|
-
toolsUsed: chainResult.techniques.map(t => t.id),
|
|
1127
|
-
planOnly: false,
|
|
1128
|
-
tookAction: true,
|
|
1129
|
-
completion: {
|
|
1130
|
-
isComplete: true,
|
|
1131
|
-
confidence: chainResult.successRate,
|
|
1132
|
-
signals: {
|
|
1133
|
-
hasExplicitCompletionStatement: true,
|
|
1134
|
-
hasIncompleteWorkIndicators: false,
|
|
1135
|
-
hasPendingActionIndicators: false,
|
|
1136
|
-
hasErrorIndicators: chainResult.successRate < 0.5,
|
|
1137
|
-
hasFollowUpQuestions: false,
|
|
1138
|
-
toolsUsedInLastResponse: chainResult.techniques.length,
|
|
1139
|
-
lastToolWasReadOnly: false,
|
|
1140
|
-
consecutiveResponsesWithoutTools: 0,
|
|
1141
|
-
hasRecentFileWrites: false,
|
|
1142
|
-
hasRecentCommits: false,
|
|
1143
|
-
todoItemsPending: 0,
|
|
1144
|
-
todoItemsCompleted: chainResult.techniques.length,
|
|
1145
|
-
mentionsFutureWork: false,
|
|
1146
|
-
completionConfidence: chainResult.successRate,
|
|
1147
|
-
},
|
|
1148
|
-
reason: 'Attack chain completed',
|
|
1149
|
-
shouldVerify: chainResult.successRate < 1.0,
|
|
1150
|
-
},
|
|
1151
|
-
exitReason: 'attack-chain-complete',
|
|
1152
|
-
statusSummary: `Attack chain: ${chainResult.techniques.length} techniques, ${Math.round(chainResult.successRate * 100)}% success`,
|
|
1153
|
-
limitations: [],
|
|
1154
|
-
recommendations: chainResult.successRate < 1.0
|
|
1155
|
-
? ['Review failed techniques and adjust approach']
|
|
1156
|
-
: [],
|
|
1157
|
-
attackChainResult: chainResult,
|
|
1158
|
-
};
|
|
1159
|
-
}
|
|
1160
|
-
catch (err) {
|
|
1161
|
-
return {
|
|
1162
|
-
finalResponse: `Attack chain execution failed: ${String(err)}`,
|
|
1163
|
-
toolsUsed: [],
|
|
1164
|
-
planOnly: false,
|
|
1165
|
-
tookAction: false,
|
|
1166
|
-
completion: {
|
|
1167
|
-
isComplete: true,
|
|
1168
|
-
confidence: 0,
|
|
1169
|
-
signals: {
|
|
1170
|
-
hasExplicitCompletionStatement: true,
|
|
1171
|
-
hasIncompleteWorkIndicators: false,
|
|
1172
|
-
hasPendingActionIndicators: false,
|
|
1173
|
-
hasErrorIndicators: true,
|
|
1174
|
-
hasFollowUpQuestions: false,
|
|
1175
|
-
toolsUsedInLastResponse: 0,
|
|
1176
|
-
lastToolWasReadOnly: false,
|
|
1177
|
-
consecutiveResponsesWithoutTools: 0,
|
|
1178
|
-
hasRecentFileWrites: false,
|
|
1179
|
-
hasRecentCommits: false,
|
|
1180
|
-
todoItemsPending: 0,
|
|
1181
|
-
todoItemsCompleted: 0,
|
|
1182
|
-
mentionsFutureWork: false,
|
|
1183
|
-
completionConfidence: 0,
|
|
1184
|
-
},
|
|
1185
|
-
reason: `Error: ${String(err)}`,
|
|
1186
|
-
shouldVerify: false,
|
|
1187
|
-
},
|
|
1188
|
-
exitReason: 'attack-chain-aborted',
|
|
1189
|
-
statusSummary: `Attack chain failed: ${String(err)}`,
|
|
1190
|
-
limitations: [String(err)],
|
|
1191
|
-
recommendations: ['Check target connectivity', 'Verify authorization context'],
|
|
1192
|
-
};
|
|
1193
|
-
}
|
|
1194
|
-
}
|
|
1195
|
-
/**
|
|
1196
|
-
* Build a human-readable summary of attack chain execution.
|
|
1197
|
-
*/
|
|
1198
|
-
buildAttackChainSummary(result) {
|
|
1199
|
-
const lines = [];
|
|
1200
|
-
lines.push('## Attack Chain Execution Summary\n');
|
|
1201
|
-
// Overall stats
|
|
1202
|
-
lines.push(`**Duration:** ${Math.round(result.totalDuration / 1000)}s`);
|
|
1203
|
-
lines.push(`**Success Rate:** ${Math.round(result.successRate * 100)}%`);
|
|
1204
|
-
lines.push(`**Phases Completed:** ${result.phasesCompleted.join(', ')}\n`);
|
|
1205
|
-
// Technique breakdown
|
|
1206
|
-
lines.push('### Techniques Executed\n');
|
|
1207
|
-
const byPhase = new Map();
|
|
1208
|
-
for (const tech of result.techniques) {
|
|
1209
|
-
const list = byPhase.get(tech.phase) || [];
|
|
1210
|
-
list.push(tech);
|
|
1211
|
-
byPhase.set(tech.phase, list);
|
|
1212
|
-
}
|
|
1213
|
-
for (const [phase, techniques] of byPhase) {
|
|
1214
|
-
lines.push(`#### ${phase}`);
|
|
1215
|
-
for (const tech of techniques) {
|
|
1216
|
-
const status = tech.success ? '✓' : '✗';
|
|
1217
|
-
lines.push(`- ${status} **${tech.name}** (${Math.round(tech.duration / 1000)}s)`);
|
|
1218
|
-
if (tech.artifacts.length > 0) {
|
|
1219
|
-
lines.push(` - Artifacts: ${tech.artifacts.length} collected`);
|
|
1220
|
-
}
|
|
1221
|
-
}
|
|
1222
|
-
lines.push('');
|
|
1223
|
-
}
|
|
1224
|
-
// Artifacts summary
|
|
1225
|
-
const allArtifacts = result.techniques.flatMap(t => t.artifacts);
|
|
1226
|
-
if (allArtifacts.length > 0) {
|
|
1227
|
-
lines.push('### Collected Artifacts\n');
|
|
1228
|
-
const artifactsByType = new Map();
|
|
1229
|
-
for (const artifact of allArtifacts) {
|
|
1230
|
-
artifactsByType.set(artifact.type, (artifactsByType.get(artifact.type) || 0) + 1);
|
|
1231
|
-
}
|
|
1232
|
-
for (const [type, count] of artifactsByType) {
|
|
1233
|
-
lines.push(`- **${type}:** ${count}`);
|
|
1234
|
-
}
|
|
1235
|
-
}
|
|
1236
|
-
return lines.join('\n');
|
|
1237
|
-
}
|
|
1238
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1239
|
-
// UNIFIED ORCHESTRATION - FULL STACK INTEGRATION
|
|
1240
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1241
|
-
/** Goal-to-technique mapping for goal-oriented execution */
|
|
1242
|
-
goalTechniques = {
|
|
1243
|
-
reconnaissance: [
|
|
1244
|
-
'dns_enum', 'whois_recon', 'asn_lookup', 'subdomain_enum', 'dns_zone_transfer',
|
|
1245
|
-
'ssl_analysis', 'ct_search', 'web_fingerprint', 'tech_fingerprint',
|
|
1246
|
-
'cloud_detect', 's3_enum', 'gcs_enum', 'azure_enum', 'email_security',
|
|
1247
|
-
'port_scan', 'service_enum', 'api_discovery', 'git_analysis',
|
|
1248
|
-
],
|
|
1249
|
-
access: [
|
|
1250
|
-
'credential_spray', 'exploit_attempt', 'cloud_metadata', 'container_escape',
|
|
1251
|
-
],
|
|
1252
|
-
persistence: [
|
|
1253
|
-
'persistence', 'sched_persist', 'ssh_persist', 'c2_beacon',
|
|
1254
|
-
],
|
|
1255
|
-
privilege: [
|
|
1256
|
-
'privesc', 'credential_harvest', 'memory_dump', 'cloud_creds',
|
|
1257
|
-
],
|
|
1258
|
-
lateral: [
|
|
1259
|
-
'lateral_move', 'network_discovery', 'smb_enum', 'ssh_enum',
|
|
1260
|
-
],
|
|
1261
|
-
collection: [
|
|
1262
|
-
'file_discovery', 'data_discovery', 'process_enum', 'cloud_enum',
|
|
1263
|
-
'credential_harvest', 'git_analysis', 'api_surface_map',
|
|
1264
|
-
],
|
|
1265
|
-
exfiltration: [
|
|
1266
|
-
'data_staging', 'exfil', 'c2_beacon',
|
|
1267
|
-
],
|
|
1268
|
-
impact: [
|
|
1269
|
-
'service_disruption', 'resource_exhaustion', 'cleanup',
|
|
1270
|
-
],
|
|
1271
|
-
stealth: [
|
|
1272
|
-
'cleanup', 'process_enum', 'network_discovery',
|
|
1273
|
-
],
|
|
1274
|
-
all: [], // Dynamically populated
|
|
1275
|
-
};
|
|
1276
|
-
/**
|
|
1277
|
-
* Unified orchestration entry point.
|
|
1278
|
-
* Routes to appropriate execution strategy based on mode and options.
|
|
1279
|
-
*
|
|
1280
|
-
* This is the primary method for full-stack unified orchestration that
|
|
1281
|
-
* combines LLM-driven orchestration with direct TAO technique execution,
|
|
1282
|
-
* infrastructure deployment, and goal-oriented APT simulation.
|
|
1283
|
-
*/
|
|
1284
|
-
async runUnified(request, options = {}) {
|
|
1285
|
-
const mode = options.unifiedMode || this.detectMode(request, options);
|
|
1286
|
-
switch (mode) {
|
|
1287
|
-
case 'attack-chain':
|
|
1288
|
-
return this.runWithAttackChain(request, options);
|
|
1289
|
-
case 'apt-simulation':
|
|
1290
|
-
return this.runAPTSimulation(request, options);
|
|
1291
|
-
case 'infrastructure':
|
|
1292
|
-
return this.runInfrastructureMode(request, options);
|
|
1293
|
-
case 'red-team':
|
|
1294
|
-
return this.runRedTeamMode(request, options);
|
|
1295
|
-
case 'intel-collection':
|
|
1296
|
-
return this.runIntelCollectionMode(request, options);
|
|
1297
|
-
case 'compliance':
|
|
1298
|
-
return this.runComplianceMode(request, options);
|
|
1299
|
-
case 'standard':
|
|
1300
|
-
default:
|
|
1301
|
-
// Check for full kill-chain mode
|
|
1302
|
-
if (options.fullKillChain) {
|
|
1303
|
-
return this.runFullOffensiveChain(request, options);
|
|
1304
|
-
}
|
|
1305
|
-
// Check for goal-oriented execution
|
|
1306
|
-
if (options.goal || options.goals) {
|
|
1307
|
-
return this.runGoalOriented(request, options);
|
|
1308
|
-
}
|
|
1309
|
-
return this.runToCompletion(request, options);
|
|
1310
|
-
}
|
|
1311
|
-
}
|
|
1312
|
-
/**
|
|
1313
|
-
* Auto-detect the appropriate orchestration mode from request content.
|
|
1314
|
-
*/
|
|
1315
|
-
detectMode(request, options) {
|
|
1316
|
-
const lower = request.toLowerCase();
|
|
1317
|
-
// Check for explicit goals first
|
|
1318
|
-
if (options.goal || options.goals) {
|
|
1319
|
-
return 'attack-chain';
|
|
1320
|
-
}
|
|
1321
|
-
// Infrastructure keywords
|
|
1322
|
-
if (lower.includes('infrastructure') || lower.includes('deploy') ||
|
|
1323
|
-
lower.includes('c2 server') || lower.includes('redirector')) {
|
|
1324
|
-
return 'infrastructure';
|
|
1325
|
-
}
|
|
1326
|
-
// Compliance keywords
|
|
1327
|
-
if (lower.includes('compliance') || lower.includes('audit') ||
|
|
1328
|
-
lower.includes('legal') || lower.includes('framework')) {
|
|
1329
|
-
return 'compliance';
|
|
1330
|
-
}
|
|
1331
|
-
// Intel collection keywords
|
|
1332
|
-
if (lower.includes('intelligence') || lower.includes('osint') ||
|
|
1333
|
-
lower.includes('gather') || lower.includes('collect')) {
|
|
1334
|
-
return 'intel-collection';
|
|
1335
|
-
}
|
|
1336
|
-
// Red team keywords
|
|
1337
|
-
if (lower.includes('red team') || lower.includes('adversary') ||
|
|
1338
|
-
lower.includes('full simulation')) {
|
|
1339
|
-
return 'red-team';
|
|
1340
|
-
}
|
|
1341
|
-
// APT simulation keywords
|
|
1342
|
-
if (lower.includes('apt') || lower.includes('kill chain') ||
|
|
1343
|
-
lower.includes('kill-chain') || lower.includes('attack chain')) {
|
|
1344
|
-
return 'apt-simulation';
|
|
1345
|
-
}
|
|
1346
|
-
// Security operation keywords (attack chain)
|
|
1347
|
-
const securityKeywords = ['scan', 'recon', 'enumerate', 'exploit', 'pentest', 'hack'];
|
|
1348
|
-
if (securityKeywords.some(kw => lower.includes(kw))) {
|
|
1349
|
-
return 'attack-chain';
|
|
1350
|
-
}
|
|
1351
|
-
return 'standard';
|
|
1352
|
-
}
|
|
1353
|
-
/**
|
|
1354
|
-
* Run full APT simulation with all kill-chain phases.
|
|
1355
|
-
*/
|
|
1356
|
-
async runAPTSimulation(request, options) {
|
|
1357
|
-
const startTime = Date.now();
|
|
1358
|
-
const intent = parseNaturalLanguageIntent(request);
|
|
1359
|
-
const targets = options.attackTargets ?? intent.targets;
|
|
1360
|
-
const depth = this.normalizeDepth(options.attackDepth ?? intent.depth);
|
|
1361
|
-
const stealth = options.stealthMode ?? intent.constraints.includes('stealth');
|
|
1362
|
-
// Validate authorization
|
|
1363
|
-
const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
|
|
1364
|
-
if (!auth.authorized) {
|
|
1365
|
-
return this.buildAuthorizationError(auth);
|
|
1366
|
-
}
|
|
1367
|
-
const realExecutions = [];
|
|
1368
|
-
const completedPhases = new Set();
|
|
1369
|
-
const allArtifacts = [];
|
|
1370
|
-
// APT Kill Chain Phases
|
|
1371
|
-
const killChainPhases = [
|
|
1372
|
-
'reconnaissance',
|
|
1373
|
-
'weaponization',
|
|
1374
|
-
'delivery',
|
|
1375
|
-
'exploitation',
|
|
1376
|
-
'installation',
|
|
1377
|
-
'command-control',
|
|
1378
|
-
'actions-on-objectives',
|
|
1379
|
-
];
|
|
1380
|
-
// Execute techniques for each target
|
|
1381
|
-
for (const target of targets) {
|
|
1382
|
-
const chain = planAttackChain(intent, `APT Simulation: ${target}`);
|
|
1383
|
-
for (const phase of killChainPhases) {
|
|
1384
|
-
const phaseTechniques = techniqueRegistry.getByPhase(phase);
|
|
1385
|
-
const techniqueLimit = depth === 'quick' ? 2 : depth === 'standard' ? 4 : phaseTechniques.length;
|
|
1386
|
-
for (const technique of phaseTechniques.slice(0, techniqueLimit)) {
|
|
1387
|
-
if (stealth && technique.stealthRating < 0.3)
|
|
1388
|
-
continue;
|
|
1389
|
-
options.onTechniqueStart?.(technique.id, target);
|
|
1390
|
-
try {
|
|
1391
|
-
const result = await technique.execute({
|
|
1392
|
-
target,
|
|
1393
|
-
depth,
|
|
1394
|
-
stealth,
|
|
1395
|
-
timeout: depth === 'deep' ? 60000 : 30000,
|
|
1396
|
-
context: { chainId: chain.id, phase },
|
|
1397
|
-
});
|
|
1398
|
-
const execResult = {
|
|
1399
|
-
techniqueId: technique.id,
|
|
1400
|
-
techniqueName: technique.name,
|
|
1401
|
-
phase,
|
|
1402
|
-
success: result.success,
|
|
1403
|
-
duration: result.duration,
|
|
1404
|
-
artifacts: result.artifacts,
|
|
1405
|
-
detectionRisk: result.detectionRisk,
|
|
1406
|
-
nextTechniques: result.nextTechniques,
|
|
1407
|
-
};
|
|
1408
|
-
realExecutions.push(execResult);
|
|
1409
|
-
allArtifacts.push(...result.artifacts);
|
|
1410
|
-
options.onTechniqueComplete?.(execResult);
|
|
1411
|
-
if (result.success) {
|
|
1412
|
-
completedPhases.add(phase);
|
|
1413
|
-
}
|
|
1414
|
-
// Stealth abort if detection risk too high
|
|
1415
|
-
if (stealth && result.detectionRisk > 0.7)
|
|
1416
|
-
break;
|
|
1417
|
-
}
|
|
1418
|
-
catch {
|
|
1419
|
-
realExecutions.push({
|
|
1420
|
-
techniqueId: technique.id,
|
|
1421
|
-
techniqueName: technique.name,
|
|
1422
|
-
phase,
|
|
1423
|
-
success: false,
|
|
1424
|
-
duration: 0,
|
|
1425
|
-
artifacts: [],
|
|
1426
|
-
detectionRisk: 0.5,
|
|
1427
|
-
nextTechniques: [],
|
|
1428
|
-
});
|
|
1429
|
-
}
|
|
1430
|
-
}
|
|
1431
|
-
}
|
|
1432
|
-
}
|
|
1433
|
-
const metrics = this.computeMetrics(realExecutions, startTime);
|
|
1434
|
-
return {
|
|
1435
|
-
finalResponse: this.buildExecutionSummary(realExecutions, metrics, 'APT Simulation'),
|
|
1436
|
-
toolsUsed: realExecutions.map(r => `tao:${r.techniqueId}`),
|
|
1437
|
-
planOnly: false,
|
|
1438
|
-
tookAction: true,
|
|
1439
|
-
completion: this.buildCompletionAnalysis(metrics),
|
|
1440
|
-
exitReason: metrics.successfulTechniques > 0 ? 'attack-chain-complete' : 'attack-chain-aborted',
|
|
1441
|
-
statusSummary: `APT: ${metrics.successfulTechniques}/${metrics.totalTechniquesExecuted} techniques`,
|
|
1442
|
-
limitations: [],
|
|
1443
|
-
recommendations: this.buildRecommendations(realExecutions),
|
|
1444
|
-
realExecutions,
|
|
1445
|
-
realMetrics: metrics,
|
|
1446
|
-
attackChains: listActiveChains(),
|
|
1447
|
-
attackChainPhases: Array.from(completedPhases),
|
|
1448
|
-
collectedArtifacts: allArtifacts,
|
|
1449
|
-
};
|
|
1450
|
-
}
|
|
1451
|
-
/**
|
|
1452
|
-
* Run infrastructure deployment mode.
|
|
1453
|
-
*/
|
|
1454
|
-
async runInfrastructureMode(request, options) {
|
|
1455
|
-
const region = options.region || 'us';
|
|
1456
|
-
const stack = getInfrastructureStack(region);
|
|
1457
|
-
if (!stack) {
|
|
1458
|
-
return {
|
|
1459
|
-
finalResponse: `No infrastructure stack available for region: ${region}`,
|
|
1460
|
-
toolsUsed: [],
|
|
1461
|
-
planOnly: false,
|
|
1462
|
-
tookAction: false,
|
|
1463
|
-
completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 0, successfulTechniques: 0, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
|
|
1464
|
-
exitReason: 'incomplete',
|
|
1465
|
-
statusSummary: 'No infrastructure stack',
|
|
1466
|
-
limitations: [`Region '${region}' not supported`],
|
|
1467
|
-
recommendations: ['Use us or ukraine region'],
|
|
1468
|
-
};
|
|
1469
|
-
}
|
|
1470
|
-
const deployScript = generateDeploymentScript(stack);
|
|
1471
|
-
const teardownScript = generateTeardownScript(stack);
|
|
1472
|
-
const opsecList = generateOpsecChecklist(stack);
|
|
1473
|
-
options.onInfrastructure?.(stack);
|
|
1474
|
-
return {
|
|
1475
|
-
finalResponse: `## Infrastructure Stack: ${stack.name}\n\n` +
|
|
1476
|
-
`**Region:** ${stack.region}\n` +
|
|
1477
|
-
`**Components:** ${stack.components.length}\n` +
|
|
1478
|
-
`**Persistence Mechanisms:** ${stack.persistence.length}\n` +
|
|
1479
|
-
`**Monthly Cost:** $${stack.costEstimate.monthly}\n\n` +
|
|
1480
|
-
`### Components\n${stack.components.map(c => `- ${c.name} (${c.type})`).join('\n')}\n\n` +
|
|
1481
|
-
`### Deployment Script Generated\n### Teardown Script Generated\n### OPSEC Checklist Generated`,
|
|
1482
|
-
toolsUsed: ['infrastructure-deploy', 'script-generator'],
|
|
1483
|
-
planOnly: false,
|
|
1484
|
-
tookAction: true,
|
|
1485
|
-
completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 1, successfulTechniques: 1, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
|
|
1486
|
-
exitReason: 'complete',
|
|
1487
|
-
statusSummary: `Infrastructure: ${stack.components.length} components`,
|
|
1488
|
-
limitations: [],
|
|
1489
|
-
recommendations: ['Review OPSEC checklist before deployment'],
|
|
1490
|
-
infrastructureStack: stack,
|
|
1491
|
-
deploymentScript: deployScript,
|
|
1492
|
-
teardownScript: teardownScript,
|
|
1493
|
-
opsecChecklist: opsecList,
|
|
1494
|
-
};
|
|
1495
|
-
}
|
|
1496
|
-
/**
|
|
1497
|
-
* Run combined red team mode.
|
|
1498
|
-
*/
|
|
1499
|
-
async runRedTeamMode(request, options) {
|
|
1500
|
-
// Red team combines infrastructure + APT simulation
|
|
1501
|
-
const infraResult = await this.runInfrastructureMode(request, options);
|
|
1502
|
-
const aptResult = await this.runAPTSimulation(request, {
|
|
1503
|
-
...options,
|
|
1504
|
-
attackDepth: 'deep',
|
|
1505
|
-
stealthMode: true,
|
|
1506
|
-
});
|
|
1507
|
-
return {
|
|
1508
|
-
...aptResult,
|
|
1509
|
-
finalResponse: `## Red Team Operation\n\n${infraResult.finalResponse}\n\n---\n\n${aptResult.finalResponse}`,
|
|
1510
|
-
toolsUsed: [...infraResult.toolsUsed, ...aptResult.toolsUsed],
|
|
1511
|
-
infrastructureStack: infraResult.infrastructureStack,
|
|
1512
|
-
deploymentScript: infraResult.deploymentScript,
|
|
1513
|
-
teardownScript: infraResult.teardownScript,
|
|
1514
|
-
opsecChecklist: infraResult.opsecChecklist,
|
|
1515
|
-
statusSummary: `Red Team: ${aptResult.realMetrics?.successfulTechniques || 0} techniques + ${infraResult.infrastructureStack?.components.length || 0} infra components`,
|
|
1516
|
-
};
|
|
1517
|
-
}
|
|
1518
|
-
/**
|
|
1519
|
-
* Run intelligence collection mode.
|
|
1520
|
-
*/
|
|
1521
|
-
async runIntelCollectionMode(request, options) {
|
|
1522
|
-
return this.runGoalOriented(request, {
|
|
1523
|
-
...options,
|
|
1524
|
-
goals: ['reconnaissance', 'collection'],
|
|
1525
|
-
collectIntel: true,
|
|
1526
|
-
minimizeFootprint: true,
|
|
1527
|
-
});
|
|
1528
|
-
}
|
|
1529
|
-
/**
|
|
1530
|
-
* Run compliance mode with legal frameworks.
|
|
1531
|
-
*/
|
|
1532
|
-
async runComplianceMode(request, options) {
|
|
1533
|
-
const agencyTypes = options.agencyTypes || ['federal-le', 'regulatory'];
|
|
1534
|
-
const contacts = agencyTypes.flatMap(at => getAgencyContacts(at));
|
|
1535
|
-
const frameworks = getApplicableLegalFrameworks(agencyTypes);
|
|
1536
|
-
const response = `## Compliance Analysis\n\n` +
|
|
1537
|
-
`### Applicable Legal Frameworks\n` +
|
|
1538
|
-
frameworks.map(f => `- **${f.name}** (${f.jurisdiction})\n Requirements: ${f.requirements.length}`).join('\n') +
|
|
1539
|
-
`\n\n### Agency Contacts\n` +
|
|
1540
|
-
contacts.map(c => `- **${c.agency}** - ${c.division}\n Contact: ${c.contact.email}`).join('\n');
|
|
1541
|
-
return {
|
|
1542
|
-
finalResponse: response,
|
|
1543
|
-
toolsUsed: ['compliance-check'],
|
|
1544
|
-
planOnly: false,
|
|
1545
|
-
tookAction: true,
|
|
1546
|
-
completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 1, successfulTechniques: 1, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
|
|
1547
|
-
exitReason: 'complete',
|
|
1548
|
-
statusSummary: `Compliance: ${frameworks.length} frameworks`,
|
|
1549
|
-
limitations: [],
|
|
1550
|
-
recommendations: frameworks.flatMap(f => f.requirements.map(r => r.requirement)),
|
|
1551
|
-
};
|
|
1552
|
-
}
|
|
1553
|
-
/**
|
|
1554
|
-
* Run goal-oriented APT execution.
|
|
1555
|
-
*/
|
|
1556
|
-
async runGoalOriented(request, options) {
|
|
1557
|
-
const startTime = Date.now();
|
|
1558
|
-
const goals = options.goals || (options.goal ? [options.goal] : ['reconnaissance']);
|
|
1559
|
-
const targets = options.attackTargets || [];
|
|
1560
|
-
const depth = this.normalizeDepth(options.attackDepth ?? 'standard');
|
|
1561
|
-
const stealth = options.stealthMode ?? options.minimizeFootprint ?? false;
|
|
1562
|
-
// Build technique list from goals
|
|
1563
|
-
let techniques = [];
|
|
1564
|
-
for (const goal of goals) {
|
|
1565
|
-
if (goal === 'all') {
|
|
1566
|
-
techniques = techniqueRegistry.list().map(t => t.id);
|
|
1567
|
-
break;
|
|
1568
|
-
}
|
|
1569
|
-
techniques.push(...(this.goalTechniques[goal] || []));
|
|
1570
|
-
}
|
|
1571
|
-
techniques = [...new Set(techniques)];
|
|
1572
|
-
const realExecutions = [];
|
|
1573
|
-
const achievedGoals = [];
|
|
1574
|
-
const allArtifacts = [];
|
|
1575
|
-
// Execute techniques
|
|
1576
|
-
for (const target of targets) {
|
|
1577
|
-
for (const techniqueId of techniques) {
|
|
1578
|
-
const technique = techniqueRegistry.get(techniqueId);
|
|
1579
|
-
if (!technique)
|
|
1580
|
-
continue;
|
|
1581
|
-
if (stealth && technique.stealthRating < 0.3)
|
|
1582
|
-
continue;
|
|
1583
|
-
options.onTechniqueStart?.(techniqueId, target);
|
|
1584
|
-
try {
|
|
1585
|
-
const result = await technique.execute({
|
|
1586
|
-
target,
|
|
1587
|
-
depth,
|
|
1588
|
-
stealth,
|
|
1589
|
-
timeout: 60000,
|
|
1590
|
-
context: {},
|
|
1591
|
-
});
|
|
1592
|
-
const execResult = {
|
|
1593
|
-
techniqueId: technique.id,
|
|
1594
|
-
techniqueName: technique.name,
|
|
1595
|
-
phase: technique.phase,
|
|
1596
|
-
success: result.success,
|
|
1597
|
-
duration: result.duration,
|
|
1598
|
-
artifacts: result.artifacts,
|
|
1599
|
-
detectionRisk: result.detectionRisk,
|
|
1600
|
-
nextTechniques: result.nextTechniques,
|
|
1601
|
-
};
|
|
1602
|
-
realExecutions.push(execResult);
|
|
1603
|
-
allArtifacts.push(...result.artifacts);
|
|
1604
|
-
options.onTechniqueComplete?.(execResult);
|
|
1605
|
-
// Check goal achievement
|
|
1606
|
-
for (const goal of goals) {
|
|
1607
|
-
if (!achievedGoals.includes(goal)) {
|
|
1608
|
-
const goalTechs = this.goalTechniques[goal] || [];
|
|
1609
|
-
if (goalTechs.includes(techniqueId) && result.success) {
|
|
1610
|
-
achievedGoals.push(goal);
|
|
1611
|
-
options.onGoalAchieved?.(goal, result.artifacts);
|
|
1612
|
-
}
|
|
1613
|
-
}
|
|
1614
|
-
}
|
|
1615
|
-
if (stealth && result.detectionRisk > 0.7)
|
|
1616
|
-
break;
|
|
1617
|
-
}
|
|
1618
|
-
catch {
|
|
1619
|
-
realExecutions.push({
|
|
1620
|
-
techniqueId: technique.id,
|
|
1621
|
-
techniqueName: technique.name,
|
|
1622
|
-
phase: technique.phase,
|
|
1623
|
-
success: false,
|
|
1624
|
-
duration: 0,
|
|
1625
|
-
artifacts: [],
|
|
1626
|
-
detectionRisk: 0.5,
|
|
1627
|
-
nextTechniques: [],
|
|
1628
|
-
});
|
|
1629
|
-
}
|
|
1630
|
-
}
|
|
1631
|
-
// Maintain persistence if requested
|
|
1632
|
-
if (options.maintainAccess) {
|
|
1633
|
-
for (const persistTech of ['persistence', 'sched_persist', 'ssh_persist']) {
|
|
1634
|
-
const technique = techniqueRegistry.get(persistTech);
|
|
1635
|
-
if (technique) {
|
|
1636
|
-
try {
|
|
1637
|
-
await technique.execute({ target, depth, stealth, timeout: 30000, context: {} });
|
|
1638
|
-
}
|
|
1639
|
-
catch { /* ignore */ }
|
|
1640
|
-
}
|
|
1641
|
-
}
|
|
1642
|
-
}
|
|
1643
|
-
}
|
|
1644
|
-
const metrics = this.computeMetrics(realExecutions, startTime);
|
|
1645
|
-
return {
|
|
1646
|
-
finalResponse: this.buildExecutionSummary(realExecutions, metrics, `Goal-Oriented: ${goals.join(', ')}`),
|
|
1647
|
-
toolsUsed: realExecutions.map(r => r.techniqueId),
|
|
1648
|
-
planOnly: false,
|
|
1649
|
-
tookAction: true,
|
|
1650
|
-
completion: this.buildCompletionAnalysis(metrics),
|
|
1651
|
-
exitReason: metrics.successfulTechniques > 0 ? 'attack-chain-complete' : 'attack-chain-aborted',
|
|
1652
|
-
statusSummary: `Goals: ${achievedGoals.length}/${goals.length} achieved`,
|
|
1653
|
-
limitations: [],
|
|
1654
|
-
recommendations: this.buildRecommendations(realExecutions),
|
|
1655
|
-
realExecutions,
|
|
1656
|
-
realMetrics: metrics,
|
|
1657
|
-
attackChains: listActiveChains(),
|
|
1658
|
-
achievedGoals,
|
|
1659
|
-
collectedArtifacts: allArtifacts,
|
|
1660
|
-
};
|
|
1661
|
-
}
|
|
1662
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1663
|
-
// HELPER METHODS
|
|
1664
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1665
|
-
normalizeDepth(depth) {
|
|
1666
|
-
if (depth === 'quick' || depth === 'standard' || depth === 'deep') {
|
|
1667
|
-
return depth;
|
|
1668
|
-
}
|
|
1669
|
-
return 'standard';
|
|
1670
|
-
}
|
|
1671
|
-
computeMetrics(executions, startTime) {
|
|
1672
|
-
const successful = executions.filter(e => e.success).length;
|
|
1673
|
-
const rlState = getRLState();
|
|
1674
|
-
return {
|
|
1675
|
-
totalTechniquesExecuted: executions.length,
|
|
1676
|
-
successfulTechniques: successful,
|
|
1677
|
-
failedTechniques: executions.length - successful,
|
|
1678
|
-
totalDuration: Date.now() - startTime,
|
|
1679
|
-
averageDetectionRisk: executions.length > 0
|
|
1680
|
-
? executions.reduce((s, e) => s + e.detectionRisk, 0) / executions.length
|
|
1681
|
-
: 0,
|
|
1682
|
-
phasesCompleted: [...new Set(executions.filter(e => e.success).map(e => e.phase))],
|
|
1683
|
-
artifactsCollected: executions.reduce((s, e) => s + e.artifacts.length, 0),
|
|
1684
|
-
rlRewardAverage: rlState.avgReward,
|
|
1685
|
-
};
|
|
1686
|
-
}
|
|
1687
|
-
buildCompletionAnalysis(metrics) {
|
|
1688
|
-
const confidence = metrics.totalTechniquesExecuted > 0
|
|
1689
|
-
? metrics.successfulTechniques / metrics.totalTechniquesExecuted
|
|
1690
|
-
: 0;
|
|
1691
|
-
return {
|
|
1692
|
-
isComplete: true,
|
|
1693
|
-
confidence,
|
|
1694
|
-
signals: {
|
|
1695
|
-
hasExplicitCompletionStatement: true,
|
|
1696
|
-
hasIncompleteWorkIndicators: false,
|
|
1697
|
-
hasPendingActionIndicators: false,
|
|
1698
|
-
hasErrorIndicators: metrics.failedTechniques > metrics.successfulTechniques,
|
|
1699
|
-
hasFollowUpQuestions: false,
|
|
1700
|
-
toolsUsedInLastResponse: metrics.totalTechniquesExecuted,
|
|
1701
|
-
lastToolWasReadOnly: false,
|
|
1702
|
-
consecutiveResponsesWithoutTools: 0,
|
|
1703
|
-
hasRecentFileWrites: false,
|
|
1704
|
-
hasRecentCommits: false,
|
|
1705
|
-
todoItemsPending: 0,
|
|
1706
|
-
todoItemsCompleted: metrics.successfulTechniques,
|
|
1707
|
-
mentionsFutureWork: false,
|
|
1708
|
-
completionConfidence: confidence,
|
|
1709
|
-
},
|
|
1710
|
-
reason: 'Unified execution complete',
|
|
1711
|
-
shouldVerify: confidence < 0.8,
|
|
1712
|
-
};
|
|
1713
|
-
}
|
|
1714
|
-
buildAuthorizationError(auth) {
|
|
1715
|
-
return {
|
|
1716
|
-
finalResponse: `## Authorization Required\n\n${auth.reason}\n\n### Warnings\n${auth.warnings.map(w => `- ${w}`).join('\n')}`,
|
|
1717
|
-
toolsUsed: [],
|
|
1718
|
-
planOnly: false,
|
|
1719
|
-
tookAction: false,
|
|
1720
|
-
completion: {
|
|
1721
|
-
isComplete: true,
|
|
1722
|
-
confidence: 1.0,
|
|
1723
|
-
signals: {
|
|
1724
|
-
hasExplicitCompletionStatement: true,
|
|
1725
|
-
hasIncompleteWorkIndicators: false,
|
|
1726
|
-
hasPendingActionIndicators: false,
|
|
1727
|
-
hasErrorIndicators: false,
|
|
1728
|
-
hasFollowUpQuestions: true,
|
|
1729
|
-
toolsUsedInLastResponse: 0,
|
|
1730
|
-
lastToolWasReadOnly: false,
|
|
1731
|
-
consecutiveResponsesWithoutTools: 0,
|
|
1732
|
-
hasRecentFileWrites: false,
|
|
1733
|
-
hasRecentCommits: false,
|
|
1734
|
-
todoItemsPending: 0,
|
|
1735
|
-
todoItemsCompleted: 0,
|
|
1736
|
-
mentionsFutureWork: false,
|
|
1737
|
-
completionConfidence: 1.0,
|
|
1738
|
-
},
|
|
1739
|
-
reason: 'Authorization required',
|
|
1740
|
-
shouldVerify: false,
|
|
1741
|
-
},
|
|
1742
|
-
exitReason: 'attack-chain-aborted',
|
|
1743
|
-
statusSummary: `Authorization required: ${auth.reason}`,
|
|
1744
|
-
limitations: auth.warnings,
|
|
1745
|
-
recommendations: [
|
|
1746
|
-
'Provide CTF/lab context',
|
|
1747
|
-
'Specify pentest authorization',
|
|
1748
|
-
'Use --auth-context flag',
|
|
1749
|
-
],
|
|
1750
|
-
};
|
|
1751
|
-
}
|
|
1752
|
-
buildExecutionSummary(executions, metrics, title) {
|
|
1753
|
-
const lines = [];
|
|
1754
|
-
lines.push(`## ${title} Execution Summary\n`);
|
|
1755
|
-
lines.push(`**Total Techniques:** ${metrics.totalTechniquesExecuted}`);
|
|
1756
|
-
lines.push(`**Successful:** ${metrics.successfulTechniques}`);
|
|
1757
|
-
lines.push(`**Failed:** ${metrics.failedTechniques}`);
|
|
1758
|
-
lines.push(`**Duration:** ${(metrics.totalDuration / 1000).toFixed(1)}s`);
|
|
1759
|
-
lines.push(`**Detection Risk:** ${(metrics.averageDetectionRisk * 100).toFixed(1)}%`);
|
|
1760
|
-
lines.push(`**Artifacts:** ${metrics.artifactsCollected}`);
|
|
1761
|
-
lines.push(`**Phases:** ${metrics.phasesCompleted.join(', ') || 'none'}`);
|
|
1762
|
-
lines.push('');
|
|
1763
|
-
lines.push(`### Technique Results\n`);
|
|
1764
|
-
for (const exec of executions) {
|
|
1765
|
-
const status = exec.success ? '✓' : '✗';
|
|
1766
|
-
lines.push(`**${status} ${exec.techniqueName}** (${exec.phase})`);
|
|
1767
|
-
lines.push(`- Duration: ${exec.duration}ms`);
|
|
1768
|
-
lines.push(`- Risk: ${(exec.detectionRisk * 100).toFixed(0)}%`);
|
|
1769
|
-
if (exec.artifacts.length > 0) {
|
|
1770
|
-
lines.push(`- Artifacts: ${exec.artifacts.length}`);
|
|
1771
|
-
}
|
|
1772
|
-
lines.push('');
|
|
1773
|
-
}
|
|
1774
|
-
return lines.join('\n');
|
|
1775
|
-
}
|
|
1776
|
-
buildRecommendations(executions) {
|
|
1777
|
-
const recommendations = [];
|
|
1778
|
-
const failed = executions.filter(e => !e.success);
|
|
1779
|
-
if (failed.length > 0) {
|
|
1780
|
-
recommendations.push(`Review ${failed.length} failed technique(s)`);
|
|
1781
|
-
}
|
|
1782
|
-
const highRisk = executions.filter(e => e.detectionRisk > 0.7);
|
|
1783
|
-
if (highRisk.length > 0) {
|
|
1784
|
-
recommendations.push(`${highRisk.length} technique(s) have high detection risk`);
|
|
1785
|
-
}
|
|
1786
|
-
const nextTechniques = [...new Set(executions.flatMap(e => e.nextTechniques))];
|
|
1787
|
-
if (nextTechniques.length > 0) {
|
|
1788
|
-
recommendations.push(`Consider: ${nextTechniques.slice(0, 5).join(', ')}`);
|
|
1789
|
-
}
|
|
1790
|
-
return recommendations;
|
|
1791
|
-
}
|
|
1792
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1793
|
-
// FULL OFFENSIVE ATTACK CHAIN EXECUTION
|
|
1794
|
-
// Direct integration with UnifiedAttackChain tool
|
|
1795
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1796
|
-
/** TAO tool suite instance for direct tool invocation */
|
|
1797
|
-
taoTools = null;
|
|
1798
|
-
/**
|
|
1799
|
-
* Get or create the TAO tools instance.
|
|
1800
|
-
*/
|
|
1801
|
-
getTaoTools() {
|
|
1802
|
-
if (!this.taoTools) {
|
|
1803
|
-
this.taoTools = createTaoTools(process.cwd());
|
|
1804
|
-
}
|
|
1805
|
-
return this.taoTools;
|
|
1806
|
-
}
|
|
1807
|
-
/**
|
|
1808
|
-
* Execute full offensive attack chain using the UnifiedAttackChain tool.
|
|
1809
|
-
* This integrates all 12 kill-chain phases with configurable stealth and objectives.
|
|
1810
|
-
*/
|
|
1811
|
-
async executeFullAttackChain(targets, options = {}) {
|
|
1812
|
-
const startTime = Date.now();
|
|
1813
|
-
const chainId = `chain_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
1814
|
-
// Validate authorization
|
|
1815
|
-
const intent = parseNaturalLanguageIntent(`attack ${targets.join(' ')}`);
|
|
1816
|
-
const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
|
|
1817
|
-
if (!auth.authorized) {
|
|
1818
|
-
return this.buildAuthorizationError(auth);
|
|
1819
|
-
}
|
|
1820
|
-
const taoTools = this.getTaoTools();
|
|
1821
|
-
const unifiedAttackChainTool = taoTools.tools.find(t => t.name === 'UnifiedAttackChain');
|
|
1822
|
-
if (!unifiedAttackChainTool) {
|
|
1823
|
-
return {
|
|
1824
|
-
finalResponse: 'UnifiedAttackChain tool not available',
|
|
1825
|
-
toolsUsed: [],
|
|
1826
|
-
planOnly: false,
|
|
1827
|
-
tookAction: false,
|
|
1828
|
-
completion: this.buildCompletionAnalysis({
|
|
1829
|
-
totalTechniquesExecuted: 0,
|
|
1830
|
-
successfulTechniques: 0,
|
|
1831
|
-
failedTechniques: 0,
|
|
1832
|
-
totalDuration: 0,
|
|
1833
|
-
averageDetectionRisk: 0,
|
|
1834
|
-
phasesCompleted: [],
|
|
1835
|
-
artifactsCollected: 0,
|
|
1836
|
-
rlRewardAverage: 0,
|
|
1837
|
-
}),
|
|
1838
|
-
exitReason: 'attack-chain-aborted',
|
|
1839
|
-
statusSummary: 'Tool unavailable',
|
|
1840
|
-
limitations: ['UnifiedAttackChain tool not loaded'],
|
|
1841
|
-
recommendations: ['Check TAO tools configuration'],
|
|
1842
|
-
};
|
|
1843
|
-
}
|
|
1844
|
-
const result = {
|
|
1845
|
-
chainId,
|
|
1846
|
-
targets,
|
|
1847
|
-
startTime,
|
|
1848
|
-
endTime: 0,
|
|
1849
|
-
duration: 0,
|
|
1850
|
-
phases: [],
|
|
1851
|
-
artifacts: [],
|
|
1852
|
-
credentials: [],
|
|
1853
|
-
persistence: [],
|
|
1854
|
-
c2Channels: [],
|
|
1855
|
-
exfilData: [],
|
|
1856
|
-
detectionEvents: [],
|
|
1857
|
-
overallSuccess: false,
|
|
1858
|
-
successRate: 0,
|
|
1859
|
-
stealthScore: 1.0,
|
|
1860
|
-
};
|
|
1861
|
-
const phasesToExecute = options.killChainPhases ?? KILL_CHAIN_PHASES;
|
|
1862
|
-
const stealthLevel = options.stealthLevel ?? 'moderate';
|
|
1863
|
-
const continueOnFailure = options.continueOnFailure ?? true;
|
|
1864
|
-
let successfulPhases = 0;
|
|
1865
|
-
let totalTechniques = 0;
|
|
1866
|
-
let successfulTechniques = 0;
|
|
1867
|
-
for (const phase of phasesToExecute) {
|
|
1868
|
-
// Check time limit
|
|
1869
|
-
if (options.chainTimeLimit) {
|
|
1870
|
-
const elapsed = (Date.now() - startTime) / 1000;
|
|
1871
|
-
if (elapsed >= options.chainTimeLimit) {
|
|
1872
|
-
result.detectionEvents.push({
|
|
1873
|
-
time: Date.now(),
|
|
1874
|
-
type: 'timeout',
|
|
1875
|
-
severity: 'info',
|
|
1876
|
-
});
|
|
1877
|
-
break;
|
|
1878
|
-
}
|
|
1879
|
-
}
|
|
1880
|
-
try {
|
|
1881
|
-
// Execute phase using UnifiedAttackChain tool
|
|
1882
|
-
const phaseResult = await unifiedAttackChainTool.handler({
|
|
1883
|
-
operation: 'execute_phase',
|
|
1884
|
-
targets,
|
|
1885
|
-
phase,
|
|
1886
|
-
chain_id: chainId,
|
|
1887
|
-
depth: options.attackDepth ?? 'standard',
|
|
1888
|
-
stealth_level: stealthLevel,
|
|
1889
|
-
objectives: options.chainObjectives ?? [],
|
|
1890
|
-
constraints: options.operationalConstraints ?? [],
|
|
1891
|
-
c2_config: options.c2Config,
|
|
1892
|
-
exfil_config: options.exfilConfig,
|
|
1893
|
-
});
|
|
1894
|
-
const parsed = JSON.parse(phaseResult);
|
|
1895
|
-
const phaseData = {
|
|
1896
|
-
phase,
|
|
1897
|
-
status: parsed.success ? 'success' : parsed.partial ? 'partial' : 'failed',
|
|
1898
|
-
techniques: parsed.techniques?.map((t) => ({
|
|
1899
|
-
id: t['id'],
|
|
1900
|
-
name: t['name'],
|
|
1901
|
-
success: t['success'],
|
|
1902
|
-
duration: t['duration'],
|
|
1903
|
-
risk: t['risk'],
|
|
1904
|
-
outputs: t['outputs'] ?? [],
|
|
1905
|
-
})) ?? [],
|
|
1906
|
-
outputs: parsed.outputs ?? [],
|
|
1907
|
-
};
|
|
1908
|
-
result.phases.push(phaseData);
|
|
1909
|
-
// Collect artifacts
|
|
1910
|
-
if (parsed.artifacts) {
|
|
1911
|
-
for (const artifact of parsed.artifacts) {
|
|
1912
|
-
result.artifacts.push({ ...artifact, phase });
|
|
1913
|
-
}
|
|
1914
|
-
}
|
|
1915
|
-
// Collect credentials
|
|
1916
|
-
if (parsed.credentials) {
|
|
1917
|
-
result.credentials.push(...parsed.credentials);
|
|
1918
|
-
}
|
|
1919
|
-
// Collect persistence mechanisms
|
|
1920
|
-
if (parsed.persistence) {
|
|
1921
|
-
result.persistence.push(...parsed.persistence);
|
|
1922
|
-
}
|
|
1923
|
-
// Collect C2 channels
|
|
1924
|
-
if (parsed.c2_channels) {
|
|
1925
|
-
result.c2Channels.push(...parsed.c2_channels);
|
|
1926
|
-
}
|
|
1927
|
-
// Collect exfil data
|
|
1928
|
-
if (parsed.exfil_data) {
|
|
1929
|
-
result.exfilData.push(...parsed.exfil_data);
|
|
1930
|
-
}
|
|
1931
|
-
// Update metrics
|
|
1932
|
-
totalTechniques += phaseData.techniques.length;
|
|
1933
|
-
successfulTechniques += phaseData.techniques.filter(t => t.success).length;
|
|
1934
|
-
if (phaseData.status === 'success') {
|
|
1935
|
-
successfulPhases++;
|
|
1936
|
-
}
|
|
1937
|
-
// Update stealth score
|
|
1938
|
-
const phaseRisk = phaseData.techniques.reduce((sum, t) => sum + t.risk, 0) /
|
|
1939
|
-
Math.max(1, phaseData.techniques.length);
|
|
1940
|
-
result.stealthScore = Math.min(result.stealthScore, 1 - phaseRisk);
|
|
1941
|
-
// Callback
|
|
1942
|
-
options.onPhaseComplete?.(phase, phaseData.outputs);
|
|
1943
|
-
// Check if we should continue
|
|
1944
|
-
if (phaseData.status === 'failed' && !continueOnFailure) {
|
|
1945
|
-
result.detectionEvents.push({
|
|
1946
|
-
time: Date.now(),
|
|
1947
|
-
type: 'phase_failure',
|
|
1948
|
-
severity: 'high',
|
|
1949
|
-
});
|
|
1950
|
-
break;
|
|
1951
|
-
}
|
|
1952
|
-
}
|
|
1953
|
-
catch (error) {
|
|
1954
|
-
result.phases.push({
|
|
1955
|
-
phase,
|
|
1956
|
-
status: 'failed',
|
|
1957
|
-
techniques: [],
|
|
1958
|
-
outputs: [],
|
|
1959
|
-
});
|
|
1960
|
-
result.detectionEvents.push({
|
|
1961
|
-
time: Date.now(),
|
|
1962
|
-
type: 'execution_error',
|
|
1963
|
-
severity: 'critical',
|
|
1964
|
-
});
|
|
1965
|
-
if (!continueOnFailure)
|
|
1966
|
-
break;
|
|
1967
|
-
}
|
|
1968
|
-
}
|
|
1969
|
-
// Finalize result
|
|
1970
|
-
result.endTime = Date.now();
|
|
1971
|
-
result.duration = result.endTime - result.startTime;
|
|
1972
|
-
result.successRate = totalTechniques > 0 ? successfulTechniques / totalTechniques : 0;
|
|
1973
|
-
result.overallSuccess = result.successRate >= 0.5 && successfulPhases >= phasesToExecute.length / 2;
|
|
1974
|
-
// Callback
|
|
1975
|
-
options.onChainComplete?.(result);
|
|
1976
|
-
// Build response
|
|
1977
|
-
const summary = this.buildFullAttackChainSummary(result);
|
|
1978
|
-
return {
|
|
1979
|
-
finalResponse: summary,
|
|
1980
|
-
toolsUsed: result.phases.flatMap(p => p.techniques.map(t => `tao:${t.id}`)),
|
|
1981
|
-
planOnly: false,
|
|
1982
|
-
tookAction: true,
|
|
1983
|
-
completion: this.buildCompletionAnalysis({
|
|
1984
|
-
totalTechniquesExecuted: totalTechniques,
|
|
1985
|
-
successfulTechniques,
|
|
1986
|
-
failedTechniques: totalTechniques - successfulTechniques,
|
|
1987
|
-
totalDuration: result.duration,
|
|
1988
|
-
averageDetectionRisk: 1 - result.stealthScore,
|
|
1989
|
-
phasesCompleted: result.phases.filter(p => p.status === 'success').map(p => p.phase),
|
|
1990
|
-
artifactsCollected: result.artifacts.length,
|
|
1991
|
-
rlRewardAverage: getRLState().avgReward,
|
|
1992
|
-
}),
|
|
1993
|
-
exitReason: result.overallSuccess ? 'attack-chain-complete' : 'attack-chain-aborted',
|
|
1994
|
-
statusSummary: `Full Kill-Chain: ${successfulPhases}/${phasesToExecute.length} phases, ${Math.round(result.successRate * 100)}% success`,
|
|
1995
|
-
limitations: result.phases.filter(p => p.status === 'failed').map(p => `Phase '${p.phase}' failed`),
|
|
1996
|
-
recommendations: this.buildFullChainRecommendations(result),
|
|
1997
|
-
fullAttackChainResult: result,
|
|
1998
|
-
attackChains: listActiveChains(),
|
|
1999
|
-
};
|
|
2000
|
-
}
|
|
2001
|
-
/**
|
|
2002
|
-
* Build summary for full attack chain execution.
|
|
2003
|
-
*/
|
|
2004
|
-
buildFullAttackChainSummary(result) {
|
|
2005
|
-
const lines = [];
|
|
2006
|
-
lines.push('## Full Kill-Chain Attack Summary\n');
|
|
2007
|
-
lines.push(`**Chain ID:** ${result.chainId}`);
|
|
2008
|
-
lines.push(`**Targets:** ${result.targets.join(', ')}`);
|
|
2009
|
-
lines.push(`**Duration:** ${(result.duration / 1000).toFixed(1)}s`);
|
|
2010
|
-
lines.push(`**Success Rate:** ${Math.round(result.successRate * 100)}%`);
|
|
2011
|
-
lines.push(`**Stealth Score:** ${Math.round(result.stealthScore * 100)}%`);
|
|
2012
|
-
lines.push('');
|
|
2013
|
-
lines.push('### Phase Results\n');
|
|
2014
|
-
for (const phase of result.phases) {
|
|
2015
|
-
const statusIcon = phase.status === 'success' ? '✓' :
|
|
2016
|
-
phase.status === 'partial' ? '◐' : '✗';
|
|
2017
|
-
lines.push(`**${statusIcon} ${phase.phase.toUpperCase()}**`);
|
|
2018
|
-
lines.push(`- Status: ${phase.status}`);
|
|
2019
|
-
lines.push(`- Techniques: ${phase.techniques.filter(t => t.success).length}/${phase.techniques.length}`);
|
|
2020
|
-
if (phase.outputs.length > 0) {
|
|
2021
|
-
lines.push(`- Outputs: ${phase.outputs.join(', ')}`);
|
|
2022
|
-
}
|
|
2023
|
-
lines.push('');
|
|
2024
|
-
}
|
|
2025
|
-
if (result.credentials.length > 0) {
|
|
2026
|
-
lines.push('### Credentials Harvested\n');
|
|
2027
|
-
lines.push(`- **Total:** ${result.credentials.length}`);
|
|
2028
|
-
const byType = new Map();
|
|
2029
|
-
for (const cred of result.credentials) {
|
|
2030
|
-
byType.set(cred.type, (byType.get(cred.type) || 0) + 1);
|
|
2031
|
-
}
|
|
2032
|
-
for (const [type, count] of byType) {
|
|
2033
|
-
lines.push(`- ${type}: ${count}`);
|
|
2034
|
-
}
|
|
2035
|
-
lines.push('');
|
|
2036
|
-
}
|
|
2037
|
-
if (result.persistence.length > 0) {
|
|
2038
|
-
lines.push('### Persistence Mechanisms\n');
|
|
2039
|
-
for (const p of result.persistence) {
|
|
2040
|
-
lines.push(`- **${p.mechanism}:** ${p.status}`);
|
|
2041
|
-
}
|
|
2042
|
-
lines.push('');
|
|
2043
|
-
}
|
|
2044
|
-
if (result.c2Channels.length > 0) {
|
|
2045
|
-
lines.push('### C2 Channels\n');
|
|
2046
|
-
for (const c2 of result.c2Channels) {
|
|
2047
|
-
lines.push(`- **${c2.type}:** ${c2.status}`);
|
|
2048
|
-
}
|
|
2049
|
-
lines.push('');
|
|
2050
|
-
}
|
|
2051
|
-
if (result.exfilData.length > 0) {
|
|
2052
|
-
lines.push('### Exfiltrated Data\n');
|
|
2053
|
-
const totalSize = result.exfilData.reduce((sum, e) => sum + e.size, 0);
|
|
2054
|
-
lines.push(`- **Total:** ${result.exfilData.length} items, ${(totalSize / 1024).toFixed(1)} KB`);
|
|
2055
|
-
lines.push('');
|
|
2056
|
-
}
|
|
2057
|
-
if (result.artifacts.length > 0) {
|
|
2058
|
-
lines.push('### Artifacts Collected\n');
|
|
2059
|
-
const byPhase = new Map();
|
|
2060
|
-
for (const a of result.artifacts) {
|
|
2061
|
-
byPhase.set(a.phase, (byPhase.get(a.phase) || 0) + 1);
|
|
2062
|
-
}
|
|
2063
|
-
for (const [phase, count] of byPhase) {
|
|
2064
|
-
lines.push(`- ${phase}: ${count}`);
|
|
2065
|
-
}
|
|
2066
|
-
lines.push('');
|
|
2067
|
-
}
|
|
2068
|
-
if (result.detectionEvents.length > 0) {
|
|
2069
|
-
lines.push('### Detection Events\n');
|
|
2070
|
-
for (const event of result.detectionEvents) {
|
|
2071
|
-
lines.push(`- **${event.severity.toUpperCase()}:** ${event.type}`);
|
|
2072
|
-
}
|
|
2073
|
-
lines.push('');
|
|
2074
|
-
}
|
|
2075
|
-
return lines.join('\n');
|
|
2076
|
-
}
|
|
2077
|
-
/**
|
|
2078
|
-
* Build recommendations for full chain execution.
|
|
2079
|
-
*/
|
|
2080
|
-
buildFullChainRecommendations(result) {
|
|
2081
|
-
const recommendations = [];
|
|
2082
|
-
const failedPhases = result.phases.filter(p => p.status === 'failed');
|
|
2083
|
-
if (failedPhases.length > 0) {
|
|
2084
|
-
recommendations.push(`Review failed phases: ${failedPhases.map(p => p.phase).join(', ')}`);
|
|
2085
|
-
}
|
|
2086
|
-
if (result.stealthScore < 0.5) {
|
|
2087
|
-
recommendations.push('High detection risk detected - consider more stealthy techniques');
|
|
2088
|
-
}
|
|
2089
|
-
if (result.credentials.length === 0 && result.phases.some(p => p.phase === 'credential_access')) {
|
|
2090
|
-
recommendations.push('No credentials harvested - try alternative credential techniques');
|
|
2091
|
-
}
|
|
2092
|
-
if (result.persistence.length === 0 && result.phases.some(p => p.phase === 'persistence')) {
|
|
2093
|
-
recommendations.push('No persistence established - critical for long-term access');
|
|
2094
|
-
}
|
|
2095
|
-
if (result.c2Channels.length === 0 && result.phases.some(p => p.phase === 'command_control')) {
|
|
2096
|
-
recommendations.push('No C2 channels established - consider fallback C2 options');
|
|
2097
|
-
}
|
|
2098
|
-
return recommendations;
|
|
2099
|
-
}
|
|
2100
|
-
/**
|
|
2101
|
-
* Run full offensive attack chain mode.
|
|
2102
|
-
* This is the main entry point for full kill-chain operations.
|
|
2103
|
-
*/
|
|
2104
|
-
async runFullOffensiveChain(request, options = {}) {
|
|
2105
|
-
// Parse targets from request
|
|
2106
|
-
const intent = parseNaturalLanguageIntent(request);
|
|
2107
|
-
const targets = options.attackTargets ?? intent.targets;
|
|
2108
|
-
if (targets.length === 0) {
|
|
2109
|
-
return {
|
|
2110
|
-
finalResponse: 'No targets specified for attack chain',
|
|
2111
|
-
toolsUsed: [],
|
|
2112
|
-
planOnly: false,
|
|
2113
|
-
tookAction: false,
|
|
2114
|
-
completion: this.buildCompletionAnalysis({
|
|
2115
|
-
totalTechniquesExecuted: 0,
|
|
2116
|
-
successfulTechniques: 0,
|
|
2117
|
-
failedTechniques: 0,
|
|
2118
|
-
totalDuration: 0,
|
|
2119
|
-
averageDetectionRisk: 0,
|
|
2120
|
-
phasesCompleted: [],
|
|
2121
|
-
artifactsCollected: 0,
|
|
2122
|
-
rlRewardAverage: 0,
|
|
2123
|
-
}),
|
|
2124
|
-
exitReason: 'incomplete',
|
|
2125
|
-
statusSummary: 'No targets',
|
|
2126
|
-
limitations: ['No targets provided'],
|
|
2127
|
-
recommendations: ['Specify targets using --targets or in the request'],
|
|
2128
|
-
};
|
|
2129
|
-
}
|
|
2130
|
-
return this.executeFullAttackChain(targets, options);
|
|
2131
|
-
}
|
|
2132
|
-
}
|
|
2133
|
-
//# sourceMappingURL=agentOrchestrator.js.map
|