erosolar-cli 2.1.270 → 2.1.272

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/capabilities/index.d.ts +0 -1
  2. package/dist/capabilities/index.d.ts.map +1 -1
  3. package/dist/capabilities/index.js +1 -1
  4. package/dist/capabilities/index.js.map +1 -1
  5. package/dist/capabilities/orchestrationCapability.d.ts.map +1 -1
  6. package/dist/capabilities/orchestrationCapability.js +56 -108
  7. package/dist/capabilities/orchestrationCapability.js.map +1 -1
  8. package/dist/core/iMessageVerification.d.ts +1 -1
  9. package/dist/core/infrastructureTemplates.d.ts +1 -1
  10. package/dist/core/infrastructureTemplates.js +5 -5
  11. package/dist/core/infrastructureTemplates.js.map +1 -1
  12. package/dist/core/persistentObjectiveStore.d.ts +13 -1
  13. package/dist/core/persistentObjectiveStore.d.ts.map +1 -1
  14. package/dist/core/persistentObjectiveStore.js.map +1 -1
  15. package/dist/core/securityDeliverableGenerator.d.ts +1 -1
  16. package/dist/core/securityDeliverableGenerator.d.ts.map +1 -1
  17. package/dist/core/securityDeliverableGenerator.js +3 -3
  18. package/dist/core/securityDeliverableGenerator.js.map +1 -1
  19. package/dist/core/toolRuntime.d.ts.map +1 -1
  20. package/dist/core/toolRuntime.js +11 -5
  21. package/dist/core/toolRuntime.js.map +1 -1
  22. package/dist/core/types.js.map +1 -1
  23. package/dist/core/unifiedOrchestrator.d.ts +265 -77
  24. package/dist/core/unifiedOrchestrator.d.ts.map +1 -1
  25. package/dist/core/unifiedOrchestrator.js +911 -254
  26. package/dist/core/unifiedOrchestrator.js.map +1 -1
  27. package/dist/providers/anthropicProvider.d.ts +1 -1
  28. package/dist/shell/interactiveShell.d.ts +1 -1
  29. package/dist/shell/interactiveShell.d.ts.map +1 -1
  30. package/dist/shell/interactiveShell.js +188 -207
  31. package/dist/shell/interactiveShell.js.map +1 -1
  32. package/dist/tools/tao/rl.js +1 -1
  33. package/dist/tools/tao/rl.js.map +1 -1
  34. package/dist/tools/taoTools.js +5 -5
  35. package/dist/tools/taoTools.js.map +1 -1
  36. package/package.json +1 -1
  37. package/dist/capabilities/unifiedInvestigationCapability.d.ts +0 -22
  38. package/dist/capabilities/unifiedInvestigationCapability.d.ts.map +0 -1
  39. package/dist/capabilities/unifiedInvestigationCapability.js +0 -41
  40. package/dist/capabilities/unifiedInvestigationCapability.js.map +0 -1
  41. package/dist/core/agentOrchestrator.d.ts +0 -400
  42. package/dist/core/agentOrchestrator.d.ts.map +0 -1
  43. package/dist/core/agentOrchestrator.js +0 -2133
  44. package/dist/core/agentOrchestrator.js.map +0 -1
  45. package/dist/core/autoExecutionOrchestrator.d.ts +0 -172
  46. package/dist/core/autoExecutionOrchestrator.d.ts.map +0 -1
  47. package/dist/core/autoExecutionOrchestrator.js +0 -591
  48. package/dist/core/autoExecutionOrchestrator.js.map +0 -1
  49. package/dist/core/dualAgentOrchestrator.d.ts +0 -34
  50. package/dist/core/dualAgentOrchestrator.d.ts.map +0 -1
  51. package/dist/core/dualAgentOrchestrator.js +0 -94
  52. package/dist/core/dualAgentOrchestrator.js.map +0 -1
  53. package/dist/core/failureRecovery.d.ts +0 -26
  54. package/dist/core/failureRecovery.d.ts.map +0 -1
  55. package/dist/core/failureRecovery.js +0 -54
  56. package/dist/core/failureRecovery.js.map +0 -1
  57. package/dist/core/intelligentTestFlows.d.ts +0 -45
  58. package/dist/core/intelligentTestFlows.d.ts.map +0 -1
  59. package/dist/core/intelligentTestFlows.js +0 -25
  60. package/dist/core/intelligentTestFlows.js.map +0 -1
  61. package/dist/core/learningPersistence.d.ts +0 -58
  62. package/dist/core/learningPersistence.d.ts.map +0 -1
  63. package/dist/core/learningPersistence.js +0 -46
  64. package/dist/core/learningPersistence.js.map +0 -1
  65. package/dist/core/metricsTracker.d.ts +0 -40
  66. package/dist/core/metricsTracker.d.ts.map +0 -1
  67. package/dist/core/metricsTracker.js +0 -83
  68. package/dist/core/metricsTracker.js.map +0 -1
  69. package/dist/core/orchestration.d.ts +0 -534
  70. package/dist/core/orchestration.d.ts.map +0 -1
  71. package/dist/core/orchestration.js +0 -2009
  72. package/dist/core/orchestration.js.map +0 -1
  73. package/dist/core/performanceMonitor.d.ts +0 -30
  74. package/dist/core/performanceMonitor.d.ts.map +0 -1
  75. package/dist/core/performanceMonitor.js +0 -39
  76. package/dist/core/performanceMonitor.js.map +0 -1
  77. package/dist/core/selfEvolution.d.ts +0 -61
  78. package/dist/core/selfEvolution.d.ts.map +0 -1
  79. package/dist/core/selfEvolution.js +0 -38
  80. package/dist/core/selfEvolution.js.map +0 -1
  81. package/dist/core/selfImprovement.d.ts +0 -82
  82. package/dist/core/selfImprovement.d.ts.map +0 -1
  83. package/dist/core/selfImprovement.js +0 -25
  84. package/dist/core/selfImprovement.js.map +0 -1
  85. package/dist/core/unifiedFraudOrchestrator.d.ts +0 -738
  86. package/dist/core/unifiedFraudOrchestrator.d.ts.map +0 -1
  87. package/dist/core/unifiedFraudOrchestrator.js +0 -3312
  88. package/dist/core/unifiedFraudOrchestrator.js.map +0 -1
  89. package/dist/core/unifiedRealOrchestrator.d.ts +0 -126
  90. package/dist/core/unifiedRealOrchestrator.d.ts.map +0 -1
  91. package/dist/core/unifiedRealOrchestrator.js +0 -558
  92. package/dist/core/unifiedRealOrchestrator.js.map +0 -1
  93. package/dist/core/userDefenseOrchestrator.d.ts +0 -202
  94. package/dist/core/userDefenseOrchestrator.d.ts.map +0 -1
  95. package/dist/core/userDefenseOrchestrator.js +0 -1006
  96. package/dist/core/userDefenseOrchestrator.js.map +0 -1
  97. package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.d.ts +0 -3
  98. package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.d.ts.map +0 -1
  99. package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.js +0 -14
  100. package/dist/plugins/tools/unifiedInvestigation/unifiedInvestigationPlugin.js.map +0 -1
  101. package/dist/tools/frontendTestingTools.d.ts +0 -9
  102. package/dist/tools/frontendTestingTools.d.ts.map +0 -1
  103. package/dist/tools/frontendTestingTools.js +0 -291
  104. package/dist/tools/frontendTestingTools.js.map +0 -1
  105. package/dist/tools/unifiedInvestigationTools.d.ts +0 -19
  106. package/dist/tools/unifiedInvestigationTools.d.ts.map +0 -1
  107. package/dist/tools/unifiedInvestigationTools.js +0 -1163
  108. package/dist/tools/unifiedInvestigationTools.js.map +0 -1
  109. package/scripts/human-verification.mjs +0 -380
  110. package/scripts/isolated-verification-runner.mjs +0 -364
  111. package/scripts/isolated-verification-wrapper.mjs +0 -276
  112. package/scripts/verify-task-completion.mjs +0 -143
@@ -1,2133 +0,0 @@
1
- import { TASK_FULLY_COMPLETE } from './constants.js';
2
- import { TaskCompletionDetector, WRITE_TOOLS } from './taskCompletionDetector.js';
3
- import { planAttackChain, selectNextTechnique, executeTechniqueInChain, techniqueRegistry, parseNaturalLanguageIntent, validateAuthorizationContext, getRLState, listActiveChains, } from '../tools/tao/index.js';
4
- import { getInfrastructureStack, generateDeploymentScript, generateTeardownScript, generateOpsecChecklist, } from './infrastructureTemplates.js';
5
- import { getAgencyContacts, getApplicableLegalFrameworks, } from './governmentProcedures.js';
6
- import { createTaoTools } from '../tools/taoTools.js';
7
- // Kill-chain phase definitions for full attack chain execution
8
- export const KILL_CHAIN_PHASES = [
9
- 'reconnaissance',
10
- 'initial_access',
11
- 'persistence',
12
- 'privilege_escalation',
13
- 'defense_evasion',
14
- 'credential_access',
15
- 'discovery',
16
- 'lateral_movement',
17
- 'collection',
18
- 'command_control',
19
- 'exfiltration',
20
- 'impact',
21
- ];
22
- /**
23
- * Single-pass orchestration: drive the agent to finish as much as possible
24
- * in one go, with strong bias toward real actions over planning.
25
- */
26
- export class AgentOrchestrator {
27
- agent;
28
- constructor(agent) {
29
- this.agent = agent;
30
- }
31
- async runToCompletion(request, options) {
32
- // Early safety check for potentially harmful requests
33
- const harmfulCheck = this.checkForHarmfulRequest(request);
34
- if (harmfulCheck.isHarmful) {
35
- return {
36
- finalResponse: harmfulCheck.response,
37
- toolsUsed: [],
38
- planOnly: false,
39
- tookAction: false,
40
- completion: {
41
- isComplete: true,
42
- confidence: 1.0,
43
- signals: {
44
- hasExplicitCompletionStatement: true,
45
- hasIncompleteWorkIndicators: false,
46
- hasPendingActionIndicators: false,
47
- hasErrorIndicators: false,
48
- hasFollowUpQuestions: false,
49
- toolsUsedInLastResponse: 0,
50
- lastToolWasReadOnly: false,
51
- consecutiveResponsesWithoutTools: 0,
52
- hasRecentFileWrites: false,
53
- hasRecentCommits: false,
54
- todoItemsPending: 0,
55
- todoItemsCompleted: 0,
56
- mentionsFutureWork: false,
57
- completionConfidence: 1.0,
58
- },
59
- reason: 'Request declined for safety reasons',
60
- shouldVerify: false,
61
- },
62
- exitReason: 'refusal',
63
- statusSummary: null,
64
- limitations: [],
65
- recommendations: [],
66
- };
67
- }
68
- const streaming = options?.streaming ?? true;
69
- const enforceActions = options?.enforceActions ?? true;
70
- const verificationMode = options?.verificationMode ?? 'auto';
71
- const singlePassFocus = options?.singlePassFocus ?? true;
72
- const maxAttempts = Math.max(1, options?.maxAttempts ?? 6);
73
- const completionDetector = new TaskCompletionDetector();
74
- const contextualGuidance = this.buildContextualGuidance(request);
75
- const attempts = [];
76
- const runAttempt = async (prompt, enforceActionsForAttempt) => {
77
- const response = (await this.agent.send(prompt, streaming)).trim();
78
- const toolExecutions = this.agent.drainToolExecutions();
79
- toolExecutions.forEach(exec => completionDetector.recordToolCall(exec.name, exec.success, exec.hasOutput));
80
- const toolsUsed = toolExecutions.filter(exec => exec.success).map(exec => exec.name);
81
- const completion = completionDetector.analyzeCompletion(response, toolsUsed);
82
- const analysis = this.analyzeResponse(response, toolsUsed, enforceActionsForAttempt);
83
- const exitReason = this.resolveExitReason(analysis, completion, enforceActionsForAttempt, verificationMode);
84
- return {
85
- response,
86
- toolsUsed,
87
- toolExecutions,
88
- completion,
89
- analysis,
90
- exitReason,
91
- };
92
- };
93
- const primaryPrompt = singlePassFocus
94
- ? this.buildSinglePassPrompt(request, enforceActions, contextualGuidance)
95
- : request.trim();
96
- let nextPrompt = primaryPrompt;
97
- let finalAttempt = null;
98
- let exitReason = 'incomplete';
99
- let maxAttemptsHit = false;
100
- let consecutiveNoProgress = 0;
101
- let consecutiveReadOnlyOnly = 0;
102
- while (attempts.length < maxAttempts) {
103
- const attempt = await runAttempt(nextPrompt, enforceActions);
104
- attempts.push(attempt);
105
- finalAttempt = attempt;
106
- exitReason = attempt.exitReason;
107
- // CRITICAL: Stop immediately on refusals - these are terminal states
108
- if (attempt.exitReason === 'refusal') {
109
- break;
110
- }
111
- // Track consecutive no-progress attempts
112
- // Key insight: tool usage alone doesn't mean progress if the model is stuck
113
- // or refusing. We need to detect actual forward momentum.
114
- const terminalNoProgress = attempt.exitReason === 'empty-response' ||
115
- attempt.exitReason === 'no-action' ||
116
- attempt.exitReason === 'blocked';
117
- // Detect response repetition - if we keep getting similar responses, we're stuck
118
- // This catches cases where model calls tools but produces same reasoning/refusal
119
- const responseFingerprint = this.computeResponseFingerprint(attempt.response);
120
- const previousAttempt = attempts.length >= 2 ? attempts[attempts.length - 2] : undefined;
121
- const isRepeatedResponse = previousAttempt !== undefined &&
122
- this.computeResponseFingerprint(previousAttempt.response) === responseFingerprint &&
123
- responseFingerprint !== '';
124
- // Progress is considered made when:
125
- // 1. Task is complete or needs verification
126
- // 2. OR tools were used with a DIFFERENT response (model is working through the problem)
127
- const isCompleting = attempt.exitReason === 'complete' ||
128
- attempt.exitReason === 'verification-needed';
129
- // Check if only read-only tools were used (no write/action tools)
130
- const usedWriteTool = attempt.toolsUsed.some(t => WRITE_TOOLS.has(t));
131
- const onlyReadOnlyTools = attempt.toolsUsed.length > 0 && !usedWriteTool;
132
- // Track consecutive read-only-only iterations
133
- // This catches silent refusals where model calls list_files repeatedly
134
- if (onlyReadOnlyTools && !isCompleting) {
135
- consecutiveReadOnlyOnly++;
136
- }
137
- else {
138
- consecutiveReadOnlyOnly = 0;
139
- }
140
- const isProgressingWithTools = attempt.toolsUsed.length > 0 && !isRepeatedResponse;
141
- const hasRealProgress = isCompleting || isProgressingWithTools;
142
- // No progress if: terminal state, no completion AND no tool progress, OR repeated response
143
- if (terminalNoProgress || (!hasRealProgress && !isCompleting) || (isRepeatedResponse && !isCompleting)) {
144
- consecutiveNoProgress++;
145
- }
146
- else {
147
- consecutiveNoProgress = 0;
148
- }
149
- // CRITICAL: Stop immediately after 2 consecutive attempts with no progress
150
- // This prevents infinite loops when model refuses silently or returns empty responses
151
- if (consecutiveNoProgress >= 2) {
152
- break;
153
- }
154
- // CRITICAL: Stop if model keeps using only read-only tools without making task progress
155
- // This catches DeepSeek/others silently refusing by calling list_files repeatedly
156
- if (consecutiveReadOnlyOnly >= 3) {
157
- break;
158
- }
159
- // Single exit check - combines completion and limit detection
160
- maxAttemptsHit = attempts.length >= maxAttempts;
161
- const continueRun = this.shouldContinue(attempts, maxAttempts);
162
- if (!continueRun || maxAttemptsHit) {
163
- break;
164
- }
165
- nextPrompt = this.buildContinuationPrompt(request, attempt, attempts.length + 1, enforceActions, contextualGuidance);
166
- }
167
- if (!finalAttempt) {
168
- throw new Error('Orchestrator did not record any attempts.');
169
- }
170
- const toolsUsed = Array.from(new Set(attempts.flatMap((attempt) => attempt.toolExecutions.filter(exec => exec.success).map(exec => exec.name))));
171
- const tookAction = attempts.some(attempt => attempt.analysis.tookAction);
172
- const planOnly = attempts.every(attempt => attempt.analysis.planOnly);
173
- const { summary, limitations, recommendations } = await this.buildStatusSummary(request, finalAttempt.response, finalAttempt.analysis, finalAttempt.completion, toolsUsed, exitReason, attempts.length, maxAttemptsHit, attempts);
174
- return {
175
- finalResponse: finalAttempt.response,
176
- toolsUsed,
177
- planOnly,
178
- tookAction,
179
- completion: finalAttempt.completion,
180
- exitReason,
181
- statusSummary: summary,
182
- limitations,
183
- recommendations,
184
- };
185
- }
186
- resolveExitReason(analysis, completion, enforceActions, verificationMode) {
187
- if (analysis.empty) {
188
- return 'empty-response';
189
- }
190
- // AI safety refusal - terminal state, do not retry
191
- if (analysis.refusal) {
192
- return 'refusal';
193
- }
194
- if (analysis.blocked) {
195
- return 'blocked';
196
- }
197
- if (enforceActions && !analysis.tookAction) {
198
- return 'no-action';
199
- }
200
- const readyToStop = analysis.readyToStop || completion.isComplete;
201
- if (verificationMode === 'auto' && completion.shouldVerify && readyToStop) {
202
- return 'verification-needed';
203
- }
204
- if (readyToStop) {
205
- return 'complete';
206
- }
207
- return 'incomplete';
208
- }
209
- async buildStatusSummary(request, response, _analysis, completion, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, attempts) {
210
- // Generate intelligent insight - the only thing users care about
211
- const summary = await this.generateInsightSummary(request, response, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, completion, attempts);
212
- // Minimal limitations/recommendations - only surface critical issues
213
- const limitations = [];
214
- const recommendations = [];
215
- if (toolsUsed.length === 0) {
216
- limitations.push('No tools ran.');
217
- }
218
- if (maxAttemptsHit && exitReason !== 'complete') {
219
- limitations.push('Hit attempt limit.');
220
- }
221
- if (completion.shouldVerify) {
222
- recommendations.push('Verify the output.');
223
- }
224
- if (toolsUsed.length > 0) {
225
- recommendations.push('Run `npm run verify-core` (lint, type-check, focused tests) before shipping.');
226
- }
227
- return { summary, limitations, recommendations };
228
- }
229
- /**
230
- * Generate comprehensive insight summary for user transparency.
231
- * Shows what was done, what might be incomplete, and potential concerns.
232
- */
233
- async generateInsightSummary(_request, _response, toolsUsed, exitReason, attemptsCount, maxAttemptsHit, completion, attempts) {
234
- const history = this.agent.getHistory();
235
- const ctx = this.extractContextAndLimitations(history, toolsUsed, attempts);
236
- // Build comprehensive summary sections
237
- const sections = [];
238
- // 1. What was done - key findings and actions
239
- if (ctx.keyFindings.length > 0) {
240
- const findings = ctx.keyFindings.slice(0, 3).join('; ');
241
- sections.push(`Done: ${findings}`);
242
- }
243
- // 2. Tool execution summary
244
- const allTools = attempts.flatMap(a => a.toolExecutions);
245
- const successfulTools = allTools.filter(t => t.success);
246
- const failedTools = allTools.filter(t => !t.success);
247
- if (allTools.length > 0) {
248
- const toolSummary = [];
249
- if (successfulTools.length > 0) {
250
- const uniqueTools = [...new Set(successfulTools.map(t => t.name))];
251
- toolSummary.push(`${successfulTools.length} tool calls (${uniqueTools.slice(0, 4).join(', ')}${uniqueTools.length > 4 ? '...' : ''})`);
252
- }
253
- if (failedTools.length > 0) {
254
- toolSummary.push(`${failedTools.length} failed`);
255
- }
256
- if (toolSummary.length > 0) {
257
- sections.push(`Tools: ${toolSummary.join(', ')}`);
258
- }
259
- }
260
- else if (toolsUsed.length === 0) {
261
- sections.push('Tools: none used (response may be ungrounded)');
262
- }
263
- // Validation/telemetry
264
- sections.push('Validation: non-blocking bash/tool telemetry enabled; prefer `npm run verify-core` for final checks.');
265
- // 3. Verification status
266
- const verifyParts = [];
267
- if (ctx.ranTests)
268
- verifyParts.push('tests ran');
269
- if (ctx.hasVerification && !ctx.ranTests)
270
- verifyParts.push('build checked');
271
- if (ctx.madeEdits && !ctx.ranTests)
272
- verifyParts.push('edits NOT tested');
273
- if (ctx.hasErrors)
274
- verifyParts.push('ERRORS detected');
275
- if (ctx.hasWarnings)
276
- verifyParts.push('warnings present');
277
- if (verifyParts.length > 0) {
278
- sections.push(`Status: ${verifyParts.join(', ')}`);
279
- }
280
- // 4. Completion status and concerns
281
- const concerns = [];
282
- if (maxAttemptsHit && exitReason !== 'complete') {
283
- concerns.push(`stopped after ${attemptsCount} attempts (may be incomplete)`);
284
- }
285
- if (exitReason === 'incomplete') {
286
- concerns.push('task may not be fully complete');
287
- }
288
- if (completion.shouldVerify) {
289
- concerns.push('manual verification recommended');
290
- }
291
- // 5. Honest limitations - what AI cannot guarantee
292
- if (ctx.limitations.length > 0) {
293
- const limitationsStr = ctx.limitations.slice(0, 3).join(', ');
294
- concerns.push(limitationsStr);
295
- }
296
- // Add concerns section if any
297
- if (concerns.length > 0) {
298
- sections.push(`Note: ${concerns.join('; ')}`);
299
- }
300
- // 6. Potential hallucination warning for ungrounded responses
301
- if (toolsUsed.length === 0 && !ctx.hasVerification) {
302
- sections.push('⚠️ No tool verification - response based on model knowledge only');
303
- }
304
- // Return null only for clean, verified completions
305
- if (sections.length === 0 ||
306
- (exitReason === 'complete' && !completion.shouldVerify &&
307
- ctx.hasVerification && !ctx.hasErrors && ctx.limitations.length === 0)) {
308
- return null;
309
- }
310
- return sections.join(' | ');
311
- }
312
- /**
313
- * Extract context AND limitations from the run.
314
- * Be honest about what we know vs don't know.
315
- */
316
- extractContextAndLimitations(history, toolsUsed, attempts) {
317
- const keyFindings = [];
318
- let hasErrors = false;
319
- let hasWarnings = false;
320
- let hasVerification = false;
321
- let ranTests = false;
322
- let madeEdits = false;
323
- const limitations = [];
324
- // Analyze tool usage
325
- const allTools = attempts.flatMap(a => a.toolExecutions);
326
- madeEdits = allTools.some(t => t.success && this.isEditTool(t.name));
327
- const validationTools = allTools.filter(t => t.success && this.isValidationTool(t.name));
328
- ranTests = validationTools.length > 0 || this.hasValidationEvidence(history);
329
- hasVerification = ranTests || validationTools.length > 0;
330
- // Scan conversation for substance
331
- const relevant = history.filter(m => m.role !== 'system').slice(-8);
332
- for (const msg of relevant) {
333
- const content = String(msg.content || '');
334
- if (msg.role === 'tool') {
335
- // Detect errors
336
- if (/\b(error|failed|failure|exception|ENOENT|EACCES|denied)\b/i.test(content)) {
337
- hasErrors = true;
338
- const errorLine = content.split('\n').find(l => /error|fail|exception/i.test(l));
339
- if (errorLine)
340
- keyFindings.push(errorLine.slice(0, 80));
341
- }
342
- // Detect warnings
343
- if (/\bwarn(ing)?\b/i.test(content)) {
344
- hasWarnings = true;
345
- }
346
- // Detect test results
347
- if (/(\d+)\s*(tests?\s*)?(passed|passing)/i.test(content)) {
348
- const match = content.match(/(\d+)\s*(tests?\s*)?(passed|passing)/i);
349
- if (match)
350
- keyFindings.push(`${match[1]} tests passed`);
351
- }
352
- if (/(\d+)\s*(tests?\s*)?(failed|failing)/i.test(content)) {
353
- const match = content.match(/(\d+)\s*(tests?\s*)?(failed|failing)/i);
354
- if (match)
355
- keyFindings.push(`${match[1]} tests failed`);
356
- hasErrors = true;
357
- }
358
- }
359
- if (msg.role === 'assistant') {
360
- // Extract conclusions
361
- const conclusionMatch = content.match(/(?:found|fixed|created|updated|added|removed|the issue was|the problem was|completed)[^.\n]{5,60}/i);
362
- if (conclusionMatch) {
363
- keyFindings.push(conclusionMatch[0].trim());
364
- }
365
- }
366
- }
367
- // Intelligent limitation analysis - context-aware reasoning about what could go wrong
368
- const toolNames = allTools.map(t => t.name);
369
- const assistantMessages = relevant.filter(m => m.role === 'assistant').map(m => String(m.content || ''));
370
- const lastAssistant = assistantMessages[assistantMessages.length - 1] || '';
371
- // 1. Verification gaps
372
- if (!ranTests) {
373
- if (madeEdits) {
374
- limitations.push('code edits made but no tests executed - behavior unverified');
375
- }
376
- else {
377
- limitations.push('no automated tests ran');
378
- }
379
- }
380
- if (madeEdits && !hasVerification) {
381
- limitations.push('no build or lint check performed after edits');
382
- }
383
- // 2. Tool execution concerns
384
- if (toolsUsed.length === 0) {
385
- limitations.push('response not grounded in tool execution - may contain assumptions');
386
- }
387
- const failedToolNames = allTools.filter(t => !t.success).map(t => t.name);
388
- if (failedToolNames.length > 0) {
389
- const unique = [...new Set(failedToolNames)];
390
- limitations.push(`${unique.length} tool(s) failed: ${unique.slice(0, 3).join(', ')}${unique.length > 3 ? '...' : ''}`);
391
- }
392
- // 3. Potential hallucination indicators
393
- if (lastAssistant) {
394
- // Check for speculative language suggesting uncertainty
395
- if (/\b(might|may|could|possibly|probably|likely|assume|guess|think)\b/i.test(lastAssistant)) {
396
- if (!/\b(definitely|confirmed|verified|tested|checked)\b/i.test(lastAssistant)) {
397
- limitations.push('response contains speculative language - verify claims');
398
- }
399
- }
400
- // Check for claims without tool evidence
401
- if (/\b(fixed|resolved|completed|works now)\b/i.test(lastAssistant) && !ranTests && !hasVerification) {
402
- limitations.push('completion claimed without verification - manually confirm');
403
- }
404
- }
405
- // 4. Edge case and scope concerns
406
- if (madeEdits) {
407
- // Analyze edit scope
408
- const editCount = allTools.filter(t => ['edit', 'edit_file', 'write', 'write_file'].includes(t.name) && t.success).length;
409
- if (editCount > 3) {
410
- limitations.push(`${editCount} files edited - review each for correctness`);
411
- }
412
- if (!ranTests) {
413
- limitations.push('edge cases and error handling paths untested');
414
- }
415
- }
416
- // 5. Security and production concerns
417
- if (toolNames.includes('bash')) {
418
- const bashOutputs = relevant.filter(m => m.role === 'tool').map(m => String(m.content || ''));
419
- const hasSensitiveOps = bashOutputs.some(o => /\b(rm -rf|chmod 777|sudo|password|secret|token|key=)\b/i.test(o));
420
- if (hasSensitiveOps) {
421
- limitations.push('sensitive operations detected - review security implications');
422
- }
423
- }
424
- // 6. Incomplete task indicators
425
- if (hasErrors && !keyFindings.some(f => /fixed|resolved/i.test(f))) {
426
- limitations.push('errors detected but not explicitly resolved');
427
- }
428
- return { keyFindings, hasErrors, hasWarnings, hasVerification, ranTests, madeEdits, limitations };
429
- }
430
- buildSinglePassPrompt(originalRequest, enforceActions, contextualGuidance) {
431
- const actionLine = enforceActions
432
- ? '- Use tools to complete the task. Act immediately instead of re-planning.'
433
- : '- Provide your best response.';
434
- const guidanceBlock = this.formatContextualGuidance(contextualGuidance);
435
- const playbook = this.buildExecutionPlaybook(enforceActions);
436
- return `${originalRequest.trim()}
437
-
438
- ${actionLine}
439
- - Finish as much as possible now.
440
- - Favor direct execution over additional planning.
441
- - Run quick verification (tests/build) when claiming completion.
442
- - Show outputs as evidence.
443
- - End with TASK_FULLY_COMPLETE when done.${guidanceBlock}${playbook}`;
444
- }
445
- shouldContinue(attempts, maxAttempts) {
446
- if (!attempts.length) {
447
- return false;
448
- }
449
- const latest = attempts[attempts.length - 1];
450
- const exitReason = latest.exitReason;
451
- if (attempts.length >= maxAttempts) {
452
- return false;
453
- }
454
- // Terminal states - never retry
455
- if (exitReason === 'refusal' || exitReason === 'complete') {
456
- return false;
457
- }
458
- // Detect stuck loops early - if we have 2+ consecutive attempts without completion, stop
459
- // This catches cases where the model refuses silently, calls tools without progress,
460
- // or returns empty/no-action repeatedly
461
- if (attempts.length >= 2) {
462
- const recentAttempts = attempts.slice(-2);
463
- const allNoCompletion = recentAttempts.every(a => a.exitReason !== 'complete' && a.exitReason !== 'verification-needed');
464
- if (allNoCompletion) {
465
- // Check if responses are similar (stuck in a loop)
466
- const fingerprints = recentAttempts.map(a => this.computeResponseFingerprint(a.response));
467
- if (fingerprints[0] === fingerprints[1] && fingerprints[0] !== '') {
468
- return false; // Stuck producing same response
469
- }
470
- }
471
- }
472
- // Allow recovery attempts for common failure modes before giving up
473
- if (exitReason === 'no-action') {
474
- const noActionAttempts = attempts.filter((a) => a.exitReason === 'no-action').length;
475
- return noActionAttempts < 2;
476
- }
477
- if (exitReason === 'verification-needed') {
478
- const verificationAttempts = attempts.filter((a) => a.exitReason === 'verification-needed').length;
479
- return verificationAttempts < 2;
480
- }
481
- if (exitReason === 'empty-response') {
482
- const emptyResponses = attempts.filter((a) => a.exitReason === 'empty-response').length;
483
- return emptyResponses < 2;
484
- }
485
- if (exitReason === 'blocked') {
486
- const blockedResponses = attempts.filter((a) => a.exitReason === 'blocked').length;
487
- return blockedResponses < 2;
488
- }
489
- // Detect tool loops - if last 2 attempts used tools, are incomplete, AND produced similar responses
490
- if (exitReason === 'incomplete' && attempts.length >= 2) {
491
- const recentAttempts = attempts.slice(-2);
492
- const allIncomplete = recentAttempts.every(a => a.exitReason === 'incomplete' && a.toolsUsed.length > 0);
493
- if (allIncomplete) {
494
- // Only stop if responses are also similar (model is stuck in a pattern)
495
- const fingerprints = recentAttempts.map(a => this.computeResponseFingerprint(a.response));
496
- if (fingerprints[0] === fingerprints[1] && fingerprints[0] !== '') {
497
- return false; // Same response pattern with tools - stuck in a loop
498
- }
499
- }
500
- }
501
- return exitReason === 'incomplete';
502
- }
503
- buildContinuationPrompt(originalRequest, attempt, nextAttemptNumber, enforceActions, contextualGuidance) {
504
- const actionLine = enforceActions
505
- ? '- Continue with tools.'
506
- : '- Provide your best response.';
507
- const reason = this.describeContinuationReason(attempt);
508
- const toolContext = attempt.toolsUsed.length
509
- ? `Tools used: ${attempt.toolsUsed.join(', ')}.`
510
- : '';
511
- const guidanceBlock = this.formatContextualGuidance(contextualGuidance);
512
- const recoveryGuidance = this.buildRecoveryGuidance(attempt);
513
- const recoveryBlock = recoveryGuidance.length
514
- ? `\nRecovery focus:\n${recoveryGuidance.map((line) => `- ${line}`).join('\n')}`
515
- : '';
516
- const playbook = this.buildExecutionPlaybook(enforceActions);
517
- return `${originalRequest.trim()}
518
-
519
- Pass ${nextAttemptNumber}: ${reason}
520
- ${toolContext}
521
- ${actionLine}
522
- - End with TASK_FULLY_COMPLETE when done.${guidanceBlock}${recoveryBlock}${playbook}`;
523
- }
524
- buildRecoveryGuidance(attempt) {
525
- const guidance = [];
526
- const madeEdits = attempt.toolExecutions.some((tool) => tool.success && this.isEditTool(tool.name));
527
- const ranValidation = attempt.toolExecutions.some((tool) => tool.success && this.isValidationTool(tool.name));
528
- if (attempt.exitReason === 'no-action' || attempt.analysis.planOnly || !attempt.analysis.tookAction) {
529
- guidance.push('Skip more planning; call tools now (read/search -> edit/run checks).');
530
- }
531
- if (attempt.exitReason === 'verification-needed') {
532
- guidance.push('Run verification now (tests/build) and include the output before stopping.');
533
- }
534
- if (madeEdits && !ranValidation) {
535
- guidance.push('You edited code; run fast validation (tests/lint/build) and show the output before stopping.');
536
- }
537
- if (attempt.exitReason === 'empty-response') {
538
- guidance.push('Provide a substantive update with tool calls and outputs; previous reply was empty.');
539
- }
540
- if (attempt.exitReason === 'blocked') {
541
- guidance.push('Proceed with available context without asking for more input; use tools to unblock.');
542
- }
543
- const failedTools = attempt.toolExecutions.filter((tool) => !tool.success).map((tool) => tool.name);
544
- if (failedTools.length > 0) {
545
- const uniqueFailed = Array.from(new Set(failedTools));
546
- guidance.push(`Retry failed tools with fixes or alternatives (${uniqueFailed.join(', ')}).`);
547
- }
548
- return guidance;
549
- }
550
- buildContextualGuidance(request) {
551
- const normalized = request.toLowerCase();
552
- const guidance = [];
553
- guidance.push('Ground responses in actual code; read files before suggesting changes.');
554
- guidance.push('Default loop: analyze → choose tools → execute → verify → summarize with evidence.');
555
- guidance.push('Prefer running targeted commands/tests over narratives; include tool outputs as proof.');
556
- const isLargePaste = normalized.length > 4000 || normalized.split('\n').length > 120;
557
- if (isLargePaste) {
558
- guidance.push('Work directly with the provided long input; avoid asking for more detail and act on it.');
559
- }
560
- const bugSweep = /\bbugs?\b|\bdefects?\b|\bissues?\b|\bbug\s+report\b/.test(normalized);
561
- if (bugSweep) {
562
- guidance.push('Search broadly for related issues.');
563
- }
564
- const asksForTests = /\b(tests?|validation|verify|reproduce|repro)\b/.test(normalized);
565
- if (asksForTests || bugSweep) {
566
- guidance.push('Capture a failing reproduction or test output first, then re-run after fixes to prove resolution.');
567
- }
568
- const needsUnderstanding = /\bexplain\b|\bunders(?:tand|tanding)\b|\boverview\b|\bsummar(y|ize)\b|\bwalk\s+through\b/.test(normalized);
569
- if (needsUnderstanding) {
570
- guidance.push('Skim project structure before detailed analysis.');
571
- }
572
- const securityWork = /\b(pentest|red\s*team|offensive|exploit|attack|apt|threat|recon|scan|payload|shell|persistence|tao|security testing)\b/.test(normalized);
573
- if (securityWork) {
574
- guidance.push('For security/offensive tasks use the TAO suite (TaoOps for NL planning; KineticOps/Recon/Fuzz/Payload/Persist/Hash/NetCraft for execution) and show evidence.');
575
- }
576
- const mentionsTests = /\b(test|verify|lint|type[-\s]?check)\b/.test(normalized);
577
- if (mentionsTests || /build|compile/.test(normalized)) {
578
- guidance.push('Run quick validation (lint + type-check + focused tests) when changing code; use `npm run verify-core` when unsure.');
579
- }
580
- return guidance;
581
- }
582
- formatContextualGuidance(guidance) {
583
- if (!guidance.length)
584
- return '';
585
- const lines = guidance.map((line) => `- ${line}`).join('\n');
586
- return `\nContextual guidance:\n${lines}`;
587
- }
588
- buildExecutionPlaybook(enforceActions) {
589
- const bullets = [
590
- enforceActions
591
- ? 'Start with a 2-3 bullet action plan that names the tools you will use; skip restating the prompt.'
592
- : 'If you plan, keep it to 2-3 bullets that name the tools you will use; skip restating the prompt.',
593
- 'Read before writing: inspect the most relevant files/logs first (ls/rg/cat) to ground changes.',
594
- 'Use the smallest effective tool and keep edits scoped; prefer targeted commands over broad rewrites.',
595
- 'After any edits or fixes, run a quick validation (targeted tests/lint/build) and include the command output.',
596
- 'Keep narration tight; surface evidence and next steps; end with TASK_FULLY_COMPLETE only when truly done.',
597
- ];
598
- return `\nExecution playbook:\n${bullets.map((line) => `- ${line}`).join('\n')}`;
599
- }
600
- isEditTool(name) {
601
- const normalized = name.toLowerCase();
602
- return ['edit', 'write', 'apply', 'patch', 'codemod', 'refactor', 'format'].some(keyword => normalized.includes(keyword));
603
- }
604
- isValidationTool(name) {
605
- const normalized = name.toLowerCase();
606
- return ['test', 'lint', 'build', 'check', 'verify', 'coverage', 'ci', 'type-check'].some(keyword => normalized.includes(keyword));
607
- }
608
- hasValidationEvidence(history) {
609
- const recentTools = history.filter(m => m.role === 'tool').slice(-10);
610
- return recentTools.some((message) => {
611
- const content = String(message.content || '').toLowerCase();
612
- return (/\b(jest|mocha|vitest|pytest|go test|npm test|pnpm test|yarn test|pytest|unittest|integration test|end-to-end)\b/.test(content) ||
613
- /\b(lint|eslint|tsc|type[- ]?check|typecheck)\b/.test(content) ||
614
- /\b(build|webpack|vite build|next build|pnpm build|npm run build)\b/.test(content) ||
615
- (/\btests?\b/.test(content) && (/\bpass(ed)?\b/.test(content) || /\bfail(ed)?\b/.test(content))));
616
- });
617
- }
618
- describeContinuationReason(attempt) {
619
- switch (attempt.exitReason) {
620
- case 'no-action':
621
- return 'previous response lacked concrete actions or tool calls.';
622
- case 'verification-needed':
623
- return attempt.completion.reason
624
- ? `verification was requested (${attempt.completion.reason}).`
625
- : 'verification was requested.';
626
- case 'empty-response':
627
- return 'previous response was empty or non-substantive.';
628
- case 'blocked':
629
- return 'model reported being blocked; continue with available context and tools.';
630
- case 'refusal':
631
- return 'request was declined.';
632
- case 'incomplete':
633
- default:
634
- return attempt.completion.reason
635
- ? `previous response did not reach a completion signal (${attempt.completion.reason}).`
636
- : 'no completion signal detected in the previous response.';
637
- }
638
- }
639
- analyzeResponse(response, toolsUsed, enforceActions) {
640
- const trimmed = response.trim();
641
- const wordCount = this.wordCount(trimmed);
642
- const empty = trimmed.length === 0;
643
- const planOnly = this.isPlanOnly(trimmed);
644
- const tookAction = toolsUsed.length > 0 || this.hasActionLanguage(trimmed);
645
- const hasCompletionMarker = this.hasExplicitCompletion(trimmed);
646
- const completionContradiction = this.hasCompletionContradiction(trimmed);
647
- const continuing = this.hasContinuingSignal(trimmed);
648
- const blocked = this.isBlockedResponse(trimmed);
649
- const refusal = this.isRefusalResponse(trimmed);
650
- const tooLight = wordCount < 80;
651
- const substantiveAnswer = this.hasSubstantiveAnswer(trimmed, {
652
- planOnly,
653
- tookAction,
654
- continuing,
655
- blocked,
656
- completionContradiction,
657
- wordCount,
658
- });
659
- const readyToStop = (hasCompletionMarker &&
660
- !completionContradiction &&
661
- !continuing &&
662
- (!enforceActions || tookAction || !tooLight)) ||
663
- substantiveAnswer;
664
- return {
665
- empty,
666
- planOnly,
667
- tookAction,
668
- readyToStop,
669
- blocked,
670
- refusal,
671
- };
672
- }
673
- isPlanOnly(response) {
674
- if (!response.trim())
675
- return false;
676
- const planIndicators = [
677
- /\bplan\b/i,
678
- /\bapproach\b/i,
679
- /\bsteps?:\b/i,
680
- /\bstep\s+1\b/i,
681
- /\bstart by\b/i,
682
- /\bfirst[, ]/i,
683
- /\bthen\s+(?:we|i|run|do|take|handle|address|implement|fix)\b/i,
684
- /\bnext\s+(?:we|i|up)\b/i,
685
- /\bwe\s+will\b/i,
686
- /\bi['\u2019]?ll\b/i,
687
- /\bi\s+will\b/i,
688
- /\bhere(?:'|\u2019)s\s+the\s+plan\b/i,
689
- /\bplan:\b/i,
690
- /\bapproach:\b/i,
691
- /\bexecution\s+plan\b/i,
692
- ];
693
- return planIndicators.some((pattern) => pattern.test(response));
694
- }
695
- hasExplicitCompletion(response) {
696
- if (!response.trim())
697
- return false;
698
- if (response.includes(TASK_FULLY_COMPLETE))
699
- return true;
700
- const completionPatterns = [
701
- /\btask(s)? (is|are)?\s*complete\b/i,
702
- /\ball done\b/i,
703
- /\bcleanup complete\b/i,
704
- /\bnothing (else\s*)?to do\b/i,
705
- /\bno junk (found|remaining)\b/i,
706
- ];
707
- return completionPatterns.some((pattern) => pattern.test(response));
708
- }
709
- hasCompletionContradiction(response) {
710
- // Skip contradiction check if TASK_FULLY_COMPLETE is present - explicit marker takes precedence
711
- if (response.includes(TASK_FULLY_COMPLETE)) {
712
- return false;
713
- }
714
- const contradictions = [
715
- /not\s+yet\s+(done|complete|finished|integrated|implemented)/i,
716
- /\bstill\s+(need|needs|pending|left)\s+to\b/i, // more specific: "still need to"
717
- /\bpending\s+(work|tasks?|items?|changes?|fixes?)\b/i, // only actual pending work
718
- /\bremaining\s+(work|tasks?|items?|to\s+do)\b/i, // only actual remaining work
719
- /\bnot\s+(working|functional)\b/i,
720
- /\btests?\s+(are\s+)?failing\b/i,
721
- /\bto\s+be\s+(done|completed|fixed|implemented)\b/i,
722
- /\btodo\b/i,
723
- /\bfixme\b/i,
724
- ];
725
- return contradictions.some((pattern) => pattern.test(response));
726
- }
727
- hasActionLanguage(response) {
728
- const actionPatterns = [
729
- /\bremoved\b/i,
730
- /\bdeleted\b/i,
731
- /\bcreated\b/i,
732
- /\badded\b/i,
733
- /\bupdated\b/i,
734
- /\brefactored\b/i,
735
- /\bcommitted\b/i,
736
- /\bran\b.+\btests?\b/i,
737
- /\bcleaned\b/i,
738
- /\bexecuted\b/i,
739
- /\bapplied\b/i,
740
- /\bpatched\b/i,
741
- /\bimplemented\b/i,
742
- /\bfixed\b/i,
743
- /\bverified\b/i,
744
- /\bvalidated\b/i,
745
- /\bdeployed\b/i,
746
- /\bmerged\b/i,
747
- ];
748
- return actionPatterns.some((pattern) => pattern.test(response));
749
- }
750
- hasContinuingSignal(response) {
751
- if (!response.trim())
752
- return false;
753
- const patterns = [
754
- /\b(i'?m|i am)\s+(continuing|working|starting|beginning)\b/i,
755
- /\bwill\s+(now|next)\s+(run|do|start|take|handle|work on)\b/i,
756
- /\babout\s+to\s+(run|execute|start|begin)\b/i,
757
- /\bnext\s+up\b/i,
758
- /\bplan\s+to\s+start\b/i,
759
- ];
760
- return patterns.some((pattern) => pattern.test(response));
761
- }
762
- isBlockedResponse(response) {
763
- if (!response.trim())
764
- return false;
765
- const patterns = [
766
- /\bneed(s)?\s+(more\s+)?(info|information|details|clarification)\b/i,
767
- /\bmissing\s+(api\s*key|credentials?|access|permission|token)\b/i,
768
- /\b(no|not)\s+authorized\b/i,
769
- /\b(i['\u2019]?m|i am|currently|still)\s+blocked\b/i,
770
- /\bblocked\s+(on|by|due to|because of)\b/i,
771
- /\bblocked\b.{0,40}\b(access|permission|token|credential|quota|limit)\b/i,
772
- /\bcannot\s+(proceed|continue|start|run|access)\b/i,
773
- ];
774
- return patterns.some((pattern) => pattern.test(response));
775
- }
776
- /**
777
- * Detects AI safety refusals - when the model declines to help with a request.
778
- * These are terminal states that should not be retried.
779
- */
780
- isRefusalResponse(response) {
781
- if (!response.trim())
782
- return false;
783
- const patterns = [
784
- // Direct refusals
785
- /\b(i\s+)?(can'?t|cannot|won'?t|will\s+not)\s+(help|assist|do\s+that|provide|comply|support)\b/i,
786
- /\bi'?m\s+(not\s+able|unable)\s+to\s+(help|assist|do\s+that|provide)\b/i,
787
- /\bi\s+(refuse|decline)\s+to\b/i,
788
- /\bsorry[,.]?\s+(but\s+)?(i\s+)?(can'?t|cannot|won'?t|am\s+not\s+able)\b/i,
789
- // Harmful/dangerous content refusals
790
- /\b(harmful|dangerous|illegal|unethical|malicious|destructive)\s+(content|request|activity)\b/i,
791
- /\bagainst\s+(my|the)\s+(guidelines|policies|principles|ethics)\b/i,
792
- /\bviolates?\s+(my|the)?\s*(guidelines|policies|terms|safety)\b/i,
793
- /\bnot\s+(designed|intended|programmed|able)\s+to\s+(help|assist)\s+with\b/i,
794
- // Safety/ethics language
795
- /\b(ethical|safety|responsible)\s+(concerns?|guidelines?|reasons?)\b/i,
796
- /\bcould\s+(cause|lead\s+to)\s+(harm|damage|injury)\b/i,
797
- // DeepSeek specific patterns
798
- /\bI'm an AI assistant.{0,50}(cannot|won't|can't)\b/i,
799
- ];
800
- return patterns.some((pattern) => pattern.test(response));
801
- }
802
- wordCount(text) {
803
- if (!text.trim())
804
- return 0;
805
- return text.trim().split(/\s+/).length;
806
- }
807
- /**
808
- * Compute a simple fingerprint of a response for repetition detection.
809
- * Normalizes whitespace and takes first N significant words to detect
810
- * when the model is producing the same response repeatedly.
811
- */
812
- computeResponseFingerprint(response) {
813
- if (!response?.trim())
814
- return '';
815
- // Normalize: lowercase, collapse whitespace, take first 200 chars
816
- const normalized = response.toLowerCase().replace(/\s+/g, ' ').trim();
817
- // Extract key content - skip common filler words
818
- const words = normalized.split(' ')
819
- .filter(w => w.length > 3)
820
- .slice(0, 30);
821
- return words.join(' ');
822
- }
823
- /**
824
- * Detects when a response is sufficiently detailed to stand on its own,
825
- * avoiding false "incomplete" flags for thorough answers without explicit
826
- * completion markers.
827
- */
828
- hasSubstantiveAnswer(response, options) {
829
- if (!response.trim())
830
- return false;
831
- if (options.planOnly || options.continuing || options.blocked || options.completionContradiction) {
832
- return false;
833
- }
834
- const threshold = options.tookAction ? 80 : 120;
835
- return options.wordCount >= threshold;
836
- }
837
- /**
838
- * Early safety check for potentially harmful requests.
839
- * Returns immediately with a helpful response instead of looping.
840
- *
841
- * This catches obvious cases before the model even processes them,
842
- * preventing wasted compute and infinite "thinking" loops.
843
- */
844
- checkForHarmfulRequest(request) {
845
- const lower = request.toLowerCase();
846
- // Known major companies/services that should NEVER be attacked without explicit authorization
847
- const protectedTargets = [
848
- 'google', 'microsoft', 'apple', 'amazon', 'meta', 'facebook',
849
- 'twitter', 'x.com', 'linkedin', 'github', 'gitlab', 'netflix',
850
- 'spotify', 'openai', 'anthropic', 'cloudflare', 'aws', 'azure',
851
- 'gcp', 'oracle', 'ibm', 'cisco', 'adobe', 'salesforce', 'slack',
852
- 'zoom', 'dropbox', 'paypal', 'stripe', 'visa', 'mastercard',
853
- ];
854
- // Action words that indicate malicious intent
855
- const maliciousActions = [
856
- 'attack', 'hack', 'pwn', 'exploit', 'compromise', 'breach',
857
- 'ddos', 'dos', 'flood', 'crash', 'destroy', 'deface',
858
- 'steal', 'exfiltrate', 'dump', 'leak',
859
- ];
860
- // Check for combined patterns: malicious action + protected target
861
- const hasMaliciousAction = maliciousActions.some(action => lower.includes(action));
862
- const hasProtectedTarget = protectedTargets.some(target => lower.includes(target));
863
- // Only trigger for clear cases of unauthorized attacks
864
- if (hasMaliciousAction && hasProtectedTarget) {
865
- // Check for legitimate contexts that override this
866
- const legitimateContexts = [
867
- 'ctf', 'capture the flag', 'hackthebox', 'tryhackme',
868
- 'lab', 'sandbox', 'authorized', 'pentest', 'penetration test',
869
- 'bug bounty', 'scope', 'engagement', 'test environment',
870
- ];
871
- const hasLegitimateContext = legitimateContexts.some(ctx => lower.includes(ctx));
872
- if (!hasLegitimateContext) {
873
- return {
874
- isHarmful: true,
875
- response: `I can't help with unauthorized attacks against production systems or services.
876
-
877
- For legitimate security work, I can help with:
878
- • **CTF challenges** - Specify the CTF platform or challenge name
879
- • **Authorized pentests** - Share the scope document or confirm you have written authorization
880
- • **Bug bounty** - Confirm the target is in scope for a bug bounty program
881
- • **Lab environments** - Specify it's a personal lab, HackTheBox, TryHackMe, etc.
882
- • **Security research** - Describe the defensive or educational purpose
883
-
884
- What authorized security work can I help you with?`,
885
- };
886
- }
887
- }
888
- // Also catch generic "attack everything" requests without any target
889
- if (hasMaliciousAction && !lower.includes('ctf') && !lower.includes('lab')) {
890
- const veryGenericPatterns = [
891
- /attack\s+(any|random|all)/i,
892
- /hack\s+(into|the|any)/i,
893
- /ddos\s+(the|any|all)/i,
894
- ];
895
- if (veryGenericPatterns.some(p => p.test(request))) {
896
- return {
897
- isHarmful: true,
898
- response: `I need more context about what you're trying to accomplish.
899
-
900
- If this is for:
901
- • **CTF/Competition** - What platform or challenge?
902
- • **Learning** - What concept are you trying to understand?
903
- • **Authorized testing** - What's the scope and authorization?
904
-
905
- Please provide context so I can help appropriately.`,
906
- };
907
- }
908
- }
909
- return { isHarmful: false, response: '' };
910
- }
911
- // ═══════════════════════════════════════════════════════════════════════════════
912
- // ATTACK CHAIN EXECUTION
913
- // ═══════════════════════════════════════════════════════════════════════════════
914
- /**
915
- * Detect if a request should trigger attack chain execution.
916
- * Uses natural language parsing to identify security operation intents.
917
- */
918
- shouldExecuteAttackChain(request, mode = 'auto') {
919
- if (mode === 'disabled') {
920
- return { shouldExecute: false, intent: null };
921
- }
922
- if (mode === 'forced') {
923
- const intent = parseNaturalLanguageIntent(request);
924
- return { shouldExecute: true, intent };
925
- }
926
- // Auto mode: parse intent and check for security operations
927
- const intent = parseNaturalLanguageIntent(request);
928
- const securityActions = ['recon', 'scan', 'enumerate', 'exploit', 'extract', 'test', 'monitor'];
929
- const isSecurityOp = securityActions.includes(intent.action);
930
- const hasTargets = intent.targets.length > 0;
931
- return {
932
- shouldExecute: isSecurityOp && hasTargets,
933
- intent: isSecurityOp ? intent : null,
934
- };
935
- }
936
- /**
937
- * Validate authorization context for attack chain execution.
938
- * Ensures proper authorization before executing security techniques.
939
- */
940
- validateAttackAuthorization(intent, authContext) {
941
- // Use the TAO authorization validator
942
- const validation = validateAuthorizationContext(intent);
943
- // Additional checks for protected targets
944
- const protectedTargets = [
945
- 'google.com', 'microsoft.com', 'apple.com', 'amazon.com',
946
- 'facebook.com', 'twitter.com', 'github.com',
947
- ];
948
- const targetingProtected = intent.targets.some(t => protectedTargets.some(pt => t.toLowerCase().includes(pt)));
949
- if (targetingProtected && !validation.valid) {
950
- return {
951
- authorized: false,
952
- reason: 'Targeting protected production systems without explicit authorization',
953
- warnings: [
954
- 'Protected targets detected. Provide CTF/lab context or authorization scope.',
955
- ...validation.warnings,
956
- ],
957
- };
958
- }
959
- // Check for explicit authorization context
960
- if (authContext) {
961
- const authorizedContexts = ['ctf', 'lab', 'pentest', 'bug-bounty', 'authorized'];
962
- const hasExplicitAuth = authorizedContexts.some(ctx => authContext.toLowerCase().includes(ctx));
963
- if (hasExplicitAuth) {
964
- return {
965
- authorized: true,
966
- reason: `Explicit authorization: ${authContext}`,
967
- warnings: validation.warnings,
968
- };
969
- }
970
- }
971
- return {
972
- authorized: validation.valid,
973
- reason: validation.type,
974
- warnings: validation.warnings,
975
- };
976
- }
977
- /**
978
- * Execute an attack chain with TAO techniques.
979
- * Returns results including all executed techniques and artifacts.
980
- */
981
- async executeAttackChain(request, options = {}) {
982
- const intent = parseNaturalLanguageIntent(request);
983
- const targets = options.targets ?? intent.targets;
984
- // Normalize depth - map 'comprehensive' to 'deep'
985
- const rawDepth = options.depth ?? intent.depth;
986
- const depth = rawDepth === 'comprehensive' ? 'deep' : rawDepth;
987
- const stealth = options.stealth ?? intent.constraints.includes('stealth');
988
- // Validate authorization
989
- const auth = this.validateAttackAuthorization(intent, options.authContext);
990
- if (!auth.authorized) {
991
- throw new Error(`Attack chain execution not authorized: ${auth.reason}`);
992
- }
993
- const executedTechniques = [];
994
- const phasesCompleted = new Set();
995
- const startTime = Date.now();
996
- // Execute chain for each target
997
- for (const target of targets) {
998
- const chain = planAttackChain(intent, `Attack chain: ${target}`);
999
- while (chain.state === 'planning' || chain.state === 'executing') {
1000
- const action = selectNextTechnique(chain);
1001
- if (!action)
1002
- break;
1003
- const technique = techniqueRegistry.get(action.id);
1004
- if (!technique)
1005
- continue;
1006
- const params = {
1007
- target,
1008
- depth,
1009
- stealth,
1010
- timeout: depth === 'deep' ? 60000 : depth === 'standard' ? 30000 : 10000,
1011
- context: {
1012
- chainId: chain.id,
1013
- phase: technique.phase,
1014
- previousArtifacts: executedTechniques
1015
- .filter(t => t.success)
1016
- .flatMap(t => t.artifacts),
1017
- },
1018
- };
1019
- try {
1020
- const { result } = await executeTechniqueInChain(chain, action, params);
1021
- executedTechniques.push({
1022
- id: technique.id,
1023
- name: technique.name,
1024
- phase: technique.phase,
1025
- success: result.success,
1026
- duration: result.duration,
1027
- artifacts: result.artifacts,
1028
- });
1029
- if (result.success) {
1030
- phasesCompleted.add(technique.phase);
1031
- }
1032
- options.onProgress?.(chain, technique.id, result);
1033
- }
1034
- catch (err) {
1035
- // Log but continue chain execution
1036
- executedTechniques.push({
1037
- id: technique.id,
1038
- name: technique.name,
1039
- phase: technique.phase,
1040
- success: false,
1041
- duration: 0,
1042
- artifacts: [{ type: 'error', data: String(err) }],
1043
- });
1044
- }
1045
- }
1046
- }
1047
- const successCount = executedTechniques.filter(t => t.success).length;
1048
- return {
1049
- chain: planAttackChain(intent, request), // Return final chain state
1050
- techniques: executedTechniques,
1051
- totalDuration: Date.now() - startTime,
1052
- successRate: executedTechniques.length > 0
1053
- ? successCount / executedTechniques.length
1054
- : 0,
1055
- phasesCompleted: Array.from(phasesCompleted),
1056
- };
1057
- }
1058
- /**
1059
- * Run orchestration with optional attack chain integration.
1060
- * When attack chain mode is enabled, security operations are executed
1061
- * directly through TAO techniques rather than relying on LLM tool calls.
1062
- */
1063
- async runWithAttackChain(request, options = {}) {
1064
- const attackChainMode = options.attackChainMode ?? 'auto';
1065
- // Check if we should execute attack chain
1066
- const { shouldExecute, intent } = this.shouldExecuteAttackChain(request, attackChainMode);
1067
- if (!shouldExecute || !intent) {
1068
- // Fall back to normal orchestration
1069
- return this.runToCompletion(request, options);
1070
- }
1071
- // Validate authorization
1072
- const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
1073
- if (!auth.authorized) {
1074
- return {
1075
- finalResponse: `Cannot execute security operation: ${auth.reason}\n\n${auth.warnings.join('\n')}`,
1076
- toolsUsed: [],
1077
- planOnly: false,
1078
- tookAction: false,
1079
- completion: {
1080
- isComplete: true,
1081
- confidence: 1.0,
1082
- signals: {
1083
- hasExplicitCompletionStatement: true,
1084
- hasIncompleteWorkIndicators: false,
1085
- hasPendingActionIndicators: false,
1086
- hasErrorIndicators: false,
1087
- hasFollowUpQuestions: false,
1088
- toolsUsedInLastResponse: 0,
1089
- lastToolWasReadOnly: false,
1090
- consecutiveResponsesWithoutTools: 0,
1091
- hasRecentFileWrites: false,
1092
- hasRecentCommits: false,
1093
- todoItemsPending: 0,
1094
- todoItemsCompleted: 0,
1095
- mentionsFutureWork: false,
1096
- completionConfidence: 1.0,
1097
- },
1098
- reason: 'Authorization required',
1099
- shouldVerify: false,
1100
- },
1101
- exitReason: 'attack-chain-aborted',
1102
- statusSummary: `Authorization required: ${auth.reason}`,
1103
- limitations: auth.warnings,
1104
- recommendations: [
1105
- 'Provide explicit authorization context (CTF, lab, pentest scope)',
1106
- 'Use --auth-context flag to specify authorization',
1107
- ],
1108
- };
1109
- }
1110
- // Execute attack chain
1111
- try {
1112
- // Normalize depth for attack chain
1113
- const attackRawDepth = options.attackDepth ?? intent.depth;
1114
- const attackDepth = attackRawDepth === 'comprehensive' ? 'deep' : attackRawDepth;
1115
- const chainResult = await this.executeAttackChain(request, {
1116
- targets: options.attackTargets ?? intent.targets,
1117
- depth: attackDepth,
1118
- stealth: options.stealthMode ?? intent.constraints.includes('stealth'),
1119
- authContext: options.authorizationContext,
1120
- onProgress: options.onAttackChainProgress,
1121
- });
1122
- // Build response summary
1123
- const summary = this.buildAttackChainSummary(chainResult);
1124
- return {
1125
- finalResponse: summary,
1126
- toolsUsed: chainResult.techniques.map(t => t.id),
1127
- planOnly: false,
1128
- tookAction: true,
1129
- completion: {
1130
- isComplete: true,
1131
- confidence: chainResult.successRate,
1132
- signals: {
1133
- hasExplicitCompletionStatement: true,
1134
- hasIncompleteWorkIndicators: false,
1135
- hasPendingActionIndicators: false,
1136
- hasErrorIndicators: chainResult.successRate < 0.5,
1137
- hasFollowUpQuestions: false,
1138
- toolsUsedInLastResponse: chainResult.techniques.length,
1139
- lastToolWasReadOnly: false,
1140
- consecutiveResponsesWithoutTools: 0,
1141
- hasRecentFileWrites: false,
1142
- hasRecentCommits: false,
1143
- todoItemsPending: 0,
1144
- todoItemsCompleted: chainResult.techniques.length,
1145
- mentionsFutureWork: false,
1146
- completionConfidence: chainResult.successRate,
1147
- },
1148
- reason: 'Attack chain completed',
1149
- shouldVerify: chainResult.successRate < 1.0,
1150
- },
1151
- exitReason: 'attack-chain-complete',
1152
- statusSummary: `Attack chain: ${chainResult.techniques.length} techniques, ${Math.round(chainResult.successRate * 100)}% success`,
1153
- limitations: [],
1154
- recommendations: chainResult.successRate < 1.0
1155
- ? ['Review failed techniques and adjust approach']
1156
- : [],
1157
- attackChainResult: chainResult,
1158
- };
1159
- }
1160
- catch (err) {
1161
- return {
1162
- finalResponse: `Attack chain execution failed: ${String(err)}`,
1163
- toolsUsed: [],
1164
- planOnly: false,
1165
- tookAction: false,
1166
- completion: {
1167
- isComplete: true,
1168
- confidence: 0,
1169
- signals: {
1170
- hasExplicitCompletionStatement: true,
1171
- hasIncompleteWorkIndicators: false,
1172
- hasPendingActionIndicators: false,
1173
- hasErrorIndicators: true,
1174
- hasFollowUpQuestions: false,
1175
- toolsUsedInLastResponse: 0,
1176
- lastToolWasReadOnly: false,
1177
- consecutiveResponsesWithoutTools: 0,
1178
- hasRecentFileWrites: false,
1179
- hasRecentCommits: false,
1180
- todoItemsPending: 0,
1181
- todoItemsCompleted: 0,
1182
- mentionsFutureWork: false,
1183
- completionConfidence: 0,
1184
- },
1185
- reason: `Error: ${String(err)}`,
1186
- shouldVerify: false,
1187
- },
1188
- exitReason: 'attack-chain-aborted',
1189
- statusSummary: `Attack chain failed: ${String(err)}`,
1190
- limitations: [String(err)],
1191
- recommendations: ['Check target connectivity', 'Verify authorization context'],
1192
- };
1193
- }
1194
- }
1195
- /**
1196
- * Build a human-readable summary of attack chain execution.
1197
- */
1198
- buildAttackChainSummary(result) {
1199
- const lines = [];
1200
- lines.push('## Attack Chain Execution Summary\n');
1201
- // Overall stats
1202
- lines.push(`**Duration:** ${Math.round(result.totalDuration / 1000)}s`);
1203
- lines.push(`**Success Rate:** ${Math.round(result.successRate * 100)}%`);
1204
- lines.push(`**Phases Completed:** ${result.phasesCompleted.join(', ')}\n`);
1205
- // Technique breakdown
1206
- lines.push('### Techniques Executed\n');
1207
- const byPhase = new Map();
1208
- for (const tech of result.techniques) {
1209
- const list = byPhase.get(tech.phase) || [];
1210
- list.push(tech);
1211
- byPhase.set(tech.phase, list);
1212
- }
1213
- for (const [phase, techniques] of byPhase) {
1214
- lines.push(`#### ${phase}`);
1215
- for (const tech of techniques) {
1216
- const status = tech.success ? '✓' : '✗';
1217
- lines.push(`- ${status} **${tech.name}** (${Math.round(tech.duration / 1000)}s)`);
1218
- if (tech.artifacts.length > 0) {
1219
- lines.push(` - Artifacts: ${tech.artifacts.length} collected`);
1220
- }
1221
- }
1222
- lines.push('');
1223
- }
1224
- // Artifacts summary
1225
- const allArtifacts = result.techniques.flatMap(t => t.artifacts);
1226
- if (allArtifacts.length > 0) {
1227
- lines.push('### Collected Artifacts\n');
1228
- const artifactsByType = new Map();
1229
- for (const artifact of allArtifacts) {
1230
- artifactsByType.set(artifact.type, (artifactsByType.get(artifact.type) || 0) + 1);
1231
- }
1232
- for (const [type, count] of artifactsByType) {
1233
- lines.push(`- **${type}:** ${count}`);
1234
- }
1235
- }
1236
- return lines.join('\n');
1237
- }
1238
- // ═══════════════════════════════════════════════════════════════════════════════
1239
- // UNIFIED ORCHESTRATION - FULL STACK INTEGRATION
1240
- // ═══════════════════════════════════════════════════════════════════════════════
1241
- /** Goal-to-technique mapping for goal-oriented execution */
1242
- goalTechniques = {
1243
- reconnaissance: [
1244
- 'dns_enum', 'whois_recon', 'asn_lookup', 'subdomain_enum', 'dns_zone_transfer',
1245
- 'ssl_analysis', 'ct_search', 'web_fingerprint', 'tech_fingerprint',
1246
- 'cloud_detect', 's3_enum', 'gcs_enum', 'azure_enum', 'email_security',
1247
- 'port_scan', 'service_enum', 'api_discovery', 'git_analysis',
1248
- ],
1249
- access: [
1250
- 'credential_spray', 'exploit_attempt', 'cloud_metadata', 'container_escape',
1251
- ],
1252
- persistence: [
1253
- 'persistence', 'sched_persist', 'ssh_persist', 'c2_beacon',
1254
- ],
1255
- privilege: [
1256
- 'privesc', 'credential_harvest', 'memory_dump', 'cloud_creds',
1257
- ],
1258
- lateral: [
1259
- 'lateral_move', 'network_discovery', 'smb_enum', 'ssh_enum',
1260
- ],
1261
- collection: [
1262
- 'file_discovery', 'data_discovery', 'process_enum', 'cloud_enum',
1263
- 'credential_harvest', 'git_analysis', 'api_surface_map',
1264
- ],
1265
- exfiltration: [
1266
- 'data_staging', 'exfil', 'c2_beacon',
1267
- ],
1268
- impact: [
1269
- 'service_disruption', 'resource_exhaustion', 'cleanup',
1270
- ],
1271
- stealth: [
1272
- 'cleanup', 'process_enum', 'network_discovery',
1273
- ],
1274
- all: [], // Dynamically populated
1275
- };
1276
- /**
1277
- * Unified orchestration entry point.
1278
- * Routes to appropriate execution strategy based on mode and options.
1279
- *
1280
- * This is the primary method for full-stack unified orchestration that
1281
- * combines LLM-driven orchestration with direct TAO technique execution,
1282
- * infrastructure deployment, and goal-oriented APT simulation.
1283
- */
1284
- async runUnified(request, options = {}) {
1285
- const mode = options.unifiedMode || this.detectMode(request, options);
1286
- switch (mode) {
1287
- case 'attack-chain':
1288
- return this.runWithAttackChain(request, options);
1289
- case 'apt-simulation':
1290
- return this.runAPTSimulation(request, options);
1291
- case 'infrastructure':
1292
- return this.runInfrastructureMode(request, options);
1293
- case 'red-team':
1294
- return this.runRedTeamMode(request, options);
1295
- case 'intel-collection':
1296
- return this.runIntelCollectionMode(request, options);
1297
- case 'compliance':
1298
- return this.runComplianceMode(request, options);
1299
- case 'standard':
1300
- default:
1301
- // Check for full kill-chain mode
1302
- if (options.fullKillChain) {
1303
- return this.runFullOffensiveChain(request, options);
1304
- }
1305
- // Check for goal-oriented execution
1306
- if (options.goal || options.goals) {
1307
- return this.runGoalOriented(request, options);
1308
- }
1309
- return this.runToCompletion(request, options);
1310
- }
1311
- }
1312
- /**
1313
- * Auto-detect the appropriate orchestration mode from request content.
1314
- */
1315
- detectMode(request, options) {
1316
- const lower = request.toLowerCase();
1317
- // Check for explicit goals first
1318
- if (options.goal || options.goals) {
1319
- return 'attack-chain';
1320
- }
1321
- // Infrastructure keywords
1322
- if (lower.includes('infrastructure') || lower.includes('deploy') ||
1323
- lower.includes('c2 server') || lower.includes('redirector')) {
1324
- return 'infrastructure';
1325
- }
1326
- // Compliance keywords
1327
- if (lower.includes('compliance') || lower.includes('audit') ||
1328
- lower.includes('legal') || lower.includes('framework')) {
1329
- return 'compliance';
1330
- }
1331
- // Intel collection keywords
1332
- if (lower.includes('intelligence') || lower.includes('osint') ||
1333
- lower.includes('gather') || lower.includes('collect')) {
1334
- return 'intel-collection';
1335
- }
1336
- // Red team keywords
1337
- if (lower.includes('red team') || lower.includes('adversary') ||
1338
- lower.includes('full simulation')) {
1339
- return 'red-team';
1340
- }
1341
- // APT simulation keywords
1342
- if (lower.includes('apt') || lower.includes('kill chain') ||
1343
- lower.includes('kill-chain') || lower.includes('attack chain')) {
1344
- return 'apt-simulation';
1345
- }
1346
- // Security operation keywords (attack chain)
1347
- const securityKeywords = ['scan', 'recon', 'enumerate', 'exploit', 'pentest', 'hack'];
1348
- if (securityKeywords.some(kw => lower.includes(kw))) {
1349
- return 'attack-chain';
1350
- }
1351
- return 'standard';
1352
- }
1353
- /**
1354
- * Run full APT simulation with all kill-chain phases.
1355
- */
1356
- async runAPTSimulation(request, options) {
1357
- const startTime = Date.now();
1358
- const intent = parseNaturalLanguageIntent(request);
1359
- const targets = options.attackTargets ?? intent.targets;
1360
- const depth = this.normalizeDepth(options.attackDepth ?? intent.depth);
1361
- const stealth = options.stealthMode ?? intent.constraints.includes('stealth');
1362
- // Validate authorization
1363
- const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
1364
- if (!auth.authorized) {
1365
- return this.buildAuthorizationError(auth);
1366
- }
1367
- const realExecutions = [];
1368
- const completedPhases = new Set();
1369
- const allArtifacts = [];
1370
- // APT Kill Chain Phases
1371
- const killChainPhases = [
1372
- 'reconnaissance',
1373
- 'weaponization',
1374
- 'delivery',
1375
- 'exploitation',
1376
- 'installation',
1377
- 'command-control',
1378
- 'actions-on-objectives',
1379
- ];
1380
- // Execute techniques for each target
1381
- for (const target of targets) {
1382
- const chain = planAttackChain(intent, `APT Simulation: ${target}`);
1383
- for (const phase of killChainPhases) {
1384
- const phaseTechniques = techniqueRegistry.getByPhase(phase);
1385
- const techniqueLimit = depth === 'quick' ? 2 : depth === 'standard' ? 4 : phaseTechniques.length;
1386
- for (const technique of phaseTechniques.slice(0, techniqueLimit)) {
1387
- if (stealth && technique.stealthRating < 0.3)
1388
- continue;
1389
- options.onTechniqueStart?.(technique.id, target);
1390
- try {
1391
- const result = await technique.execute({
1392
- target,
1393
- depth,
1394
- stealth,
1395
- timeout: depth === 'deep' ? 60000 : 30000,
1396
- context: { chainId: chain.id, phase },
1397
- });
1398
- const execResult = {
1399
- techniqueId: technique.id,
1400
- techniqueName: technique.name,
1401
- phase,
1402
- success: result.success,
1403
- duration: result.duration,
1404
- artifacts: result.artifacts,
1405
- detectionRisk: result.detectionRisk,
1406
- nextTechniques: result.nextTechniques,
1407
- };
1408
- realExecutions.push(execResult);
1409
- allArtifacts.push(...result.artifacts);
1410
- options.onTechniqueComplete?.(execResult);
1411
- if (result.success) {
1412
- completedPhases.add(phase);
1413
- }
1414
- // Stealth abort if detection risk too high
1415
- if (stealth && result.detectionRisk > 0.7)
1416
- break;
1417
- }
1418
- catch {
1419
- realExecutions.push({
1420
- techniqueId: technique.id,
1421
- techniqueName: technique.name,
1422
- phase,
1423
- success: false,
1424
- duration: 0,
1425
- artifacts: [],
1426
- detectionRisk: 0.5,
1427
- nextTechniques: [],
1428
- });
1429
- }
1430
- }
1431
- }
1432
- }
1433
- const metrics = this.computeMetrics(realExecutions, startTime);
1434
- return {
1435
- finalResponse: this.buildExecutionSummary(realExecutions, metrics, 'APT Simulation'),
1436
- toolsUsed: realExecutions.map(r => `tao:${r.techniqueId}`),
1437
- planOnly: false,
1438
- tookAction: true,
1439
- completion: this.buildCompletionAnalysis(metrics),
1440
- exitReason: metrics.successfulTechniques > 0 ? 'attack-chain-complete' : 'attack-chain-aborted',
1441
- statusSummary: `APT: ${metrics.successfulTechniques}/${metrics.totalTechniquesExecuted} techniques`,
1442
- limitations: [],
1443
- recommendations: this.buildRecommendations(realExecutions),
1444
- realExecutions,
1445
- realMetrics: metrics,
1446
- attackChains: listActiveChains(),
1447
- attackChainPhases: Array.from(completedPhases),
1448
- collectedArtifacts: allArtifacts,
1449
- };
1450
- }
1451
- /**
1452
- * Run infrastructure deployment mode.
1453
- */
1454
- async runInfrastructureMode(request, options) {
1455
- const region = options.region || 'us';
1456
- const stack = getInfrastructureStack(region);
1457
- if (!stack) {
1458
- return {
1459
- finalResponse: `No infrastructure stack available for region: ${region}`,
1460
- toolsUsed: [],
1461
- planOnly: false,
1462
- tookAction: false,
1463
- completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 0, successfulTechniques: 0, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
1464
- exitReason: 'incomplete',
1465
- statusSummary: 'No infrastructure stack',
1466
- limitations: [`Region '${region}' not supported`],
1467
- recommendations: ['Use us or ukraine region'],
1468
- };
1469
- }
1470
- const deployScript = generateDeploymentScript(stack);
1471
- const teardownScript = generateTeardownScript(stack);
1472
- const opsecList = generateOpsecChecklist(stack);
1473
- options.onInfrastructure?.(stack);
1474
- return {
1475
- finalResponse: `## Infrastructure Stack: ${stack.name}\n\n` +
1476
- `**Region:** ${stack.region}\n` +
1477
- `**Components:** ${stack.components.length}\n` +
1478
- `**Persistence Mechanisms:** ${stack.persistence.length}\n` +
1479
- `**Monthly Cost:** $${stack.costEstimate.monthly}\n\n` +
1480
- `### Components\n${stack.components.map(c => `- ${c.name} (${c.type})`).join('\n')}\n\n` +
1481
- `### Deployment Script Generated\n### Teardown Script Generated\n### OPSEC Checklist Generated`,
1482
- toolsUsed: ['infrastructure-deploy', 'script-generator'],
1483
- planOnly: false,
1484
- tookAction: true,
1485
- completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 1, successfulTechniques: 1, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
1486
- exitReason: 'complete',
1487
- statusSummary: `Infrastructure: ${stack.components.length} components`,
1488
- limitations: [],
1489
- recommendations: ['Review OPSEC checklist before deployment'],
1490
- infrastructureStack: stack,
1491
- deploymentScript: deployScript,
1492
- teardownScript: teardownScript,
1493
- opsecChecklist: opsecList,
1494
- };
1495
- }
1496
- /**
1497
- * Run combined red team mode.
1498
- */
1499
- async runRedTeamMode(request, options) {
1500
- // Red team combines infrastructure + APT simulation
1501
- const infraResult = await this.runInfrastructureMode(request, options);
1502
- const aptResult = await this.runAPTSimulation(request, {
1503
- ...options,
1504
- attackDepth: 'deep',
1505
- stealthMode: true,
1506
- });
1507
- return {
1508
- ...aptResult,
1509
- finalResponse: `## Red Team Operation\n\n${infraResult.finalResponse}\n\n---\n\n${aptResult.finalResponse}`,
1510
- toolsUsed: [...infraResult.toolsUsed, ...aptResult.toolsUsed],
1511
- infrastructureStack: infraResult.infrastructureStack,
1512
- deploymentScript: infraResult.deploymentScript,
1513
- teardownScript: infraResult.teardownScript,
1514
- opsecChecklist: infraResult.opsecChecklist,
1515
- statusSummary: `Red Team: ${aptResult.realMetrics?.successfulTechniques || 0} techniques + ${infraResult.infrastructureStack?.components.length || 0} infra components`,
1516
- };
1517
- }
1518
- /**
1519
- * Run intelligence collection mode.
1520
- */
1521
- async runIntelCollectionMode(request, options) {
1522
- return this.runGoalOriented(request, {
1523
- ...options,
1524
- goals: ['reconnaissance', 'collection'],
1525
- collectIntel: true,
1526
- minimizeFootprint: true,
1527
- });
1528
- }
1529
- /**
1530
- * Run compliance mode with legal frameworks.
1531
- */
1532
- async runComplianceMode(request, options) {
1533
- const agencyTypes = options.agencyTypes || ['federal-le', 'regulatory'];
1534
- const contacts = agencyTypes.flatMap(at => getAgencyContacts(at));
1535
- const frameworks = getApplicableLegalFrameworks(agencyTypes);
1536
- const response = `## Compliance Analysis\n\n` +
1537
- `### Applicable Legal Frameworks\n` +
1538
- frameworks.map(f => `- **${f.name}** (${f.jurisdiction})\n Requirements: ${f.requirements.length}`).join('\n') +
1539
- `\n\n### Agency Contacts\n` +
1540
- contacts.map(c => `- **${c.agency}** - ${c.division}\n Contact: ${c.contact.email}`).join('\n');
1541
- return {
1542
- finalResponse: response,
1543
- toolsUsed: ['compliance-check'],
1544
- planOnly: false,
1545
- tookAction: true,
1546
- completion: this.buildCompletionAnalysis({ totalTechniquesExecuted: 1, successfulTechniques: 1, failedTechniques: 0, totalDuration: 0, averageDetectionRisk: 0, phasesCompleted: [], artifactsCollected: 0, rlRewardAverage: 0 }),
1547
- exitReason: 'complete',
1548
- statusSummary: `Compliance: ${frameworks.length} frameworks`,
1549
- limitations: [],
1550
- recommendations: frameworks.flatMap(f => f.requirements.map(r => r.requirement)),
1551
- };
1552
- }
1553
- /**
1554
- * Run goal-oriented APT execution.
1555
- */
1556
- async runGoalOriented(request, options) {
1557
- const startTime = Date.now();
1558
- const goals = options.goals || (options.goal ? [options.goal] : ['reconnaissance']);
1559
- const targets = options.attackTargets || [];
1560
- const depth = this.normalizeDepth(options.attackDepth ?? 'standard');
1561
- const stealth = options.stealthMode ?? options.minimizeFootprint ?? false;
1562
- // Build technique list from goals
1563
- let techniques = [];
1564
- for (const goal of goals) {
1565
- if (goal === 'all') {
1566
- techniques = techniqueRegistry.list().map(t => t.id);
1567
- break;
1568
- }
1569
- techniques.push(...(this.goalTechniques[goal] || []));
1570
- }
1571
- techniques = [...new Set(techniques)];
1572
- const realExecutions = [];
1573
- const achievedGoals = [];
1574
- const allArtifacts = [];
1575
- // Execute techniques
1576
- for (const target of targets) {
1577
- for (const techniqueId of techniques) {
1578
- const technique = techniqueRegistry.get(techniqueId);
1579
- if (!technique)
1580
- continue;
1581
- if (stealth && technique.stealthRating < 0.3)
1582
- continue;
1583
- options.onTechniqueStart?.(techniqueId, target);
1584
- try {
1585
- const result = await technique.execute({
1586
- target,
1587
- depth,
1588
- stealth,
1589
- timeout: 60000,
1590
- context: {},
1591
- });
1592
- const execResult = {
1593
- techniqueId: technique.id,
1594
- techniqueName: technique.name,
1595
- phase: technique.phase,
1596
- success: result.success,
1597
- duration: result.duration,
1598
- artifacts: result.artifacts,
1599
- detectionRisk: result.detectionRisk,
1600
- nextTechniques: result.nextTechniques,
1601
- };
1602
- realExecutions.push(execResult);
1603
- allArtifacts.push(...result.artifacts);
1604
- options.onTechniqueComplete?.(execResult);
1605
- // Check goal achievement
1606
- for (const goal of goals) {
1607
- if (!achievedGoals.includes(goal)) {
1608
- const goalTechs = this.goalTechniques[goal] || [];
1609
- if (goalTechs.includes(techniqueId) && result.success) {
1610
- achievedGoals.push(goal);
1611
- options.onGoalAchieved?.(goal, result.artifacts);
1612
- }
1613
- }
1614
- }
1615
- if (stealth && result.detectionRisk > 0.7)
1616
- break;
1617
- }
1618
- catch {
1619
- realExecutions.push({
1620
- techniqueId: technique.id,
1621
- techniqueName: technique.name,
1622
- phase: technique.phase,
1623
- success: false,
1624
- duration: 0,
1625
- artifacts: [],
1626
- detectionRisk: 0.5,
1627
- nextTechniques: [],
1628
- });
1629
- }
1630
- }
1631
- // Maintain persistence if requested
1632
- if (options.maintainAccess) {
1633
- for (const persistTech of ['persistence', 'sched_persist', 'ssh_persist']) {
1634
- const technique = techniqueRegistry.get(persistTech);
1635
- if (technique) {
1636
- try {
1637
- await technique.execute({ target, depth, stealth, timeout: 30000, context: {} });
1638
- }
1639
- catch { /* ignore */ }
1640
- }
1641
- }
1642
- }
1643
- }
1644
- const metrics = this.computeMetrics(realExecutions, startTime);
1645
- return {
1646
- finalResponse: this.buildExecutionSummary(realExecutions, metrics, `Goal-Oriented: ${goals.join(', ')}`),
1647
- toolsUsed: realExecutions.map(r => r.techniqueId),
1648
- planOnly: false,
1649
- tookAction: true,
1650
- completion: this.buildCompletionAnalysis(metrics),
1651
- exitReason: metrics.successfulTechniques > 0 ? 'attack-chain-complete' : 'attack-chain-aborted',
1652
- statusSummary: `Goals: ${achievedGoals.length}/${goals.length} achieved`,
1653
- limitations: [],
1654
- recommendations: this.buildRecommendations(realExecutions),
1655
- realExecutions,
1656
- realMetrics: metrics,
1657
- attackChains: listActiveChains(),
1658
- achievedGoals,
1659
- collectedArtifacts: allArtifacts,
1660
- };
1661
- }
1662
- // ═══════════════════════════════════════════════════════════════════════════════
1663
- // HELPER METHODS
1664
- // ═══════════════════════════════════════════════════════════════════════════════
1665
- normalizeDepth(depth) {
1666
- if (depth === 'quick' || depth === 'standard' || depth === 'deep') {
1667
- return depth;
1668
- }
1669
- return 'standard';
1670
- }
1671
- computeMetrics(executions, startTime) {
1672
- const successful = executions.filter(e => e.success).length;
1673
- const rlState = getRLState();
1674
- return {
1675
- totalTechniquesExecuted: executions.length,
1676
- successfulTechniques: successful,
1677
- failedTechniques: executions.length - successful,
1678
- totalDuration: Date.now() - startTime,
1679
- averageDetectionRisk: executions.length > 0
1680
- ? executions.reduce((s, e) => s + e.detectionRisk, 0) / executions.length
1681
- : 0,
1682
- phasesCompleted: [...new Set(executions.filter(e => e.success).map(e => e.phase))],
1683
- artifactsCollected: executions.reduce((s, e) => s + e.artifacts.length, 0),
1684
- rlRewardAverage: rlState.avgReward,
1685
- };
1686
- }
1687
- buildCompletionAnalysis(metrics) {
1688
- const confidence = metrics.totalTechniquesExecuted > 0
1689
- ? metrics.successfulTechniques / metrics.totalTechniquesExecuted
1690
- : 0;
1691
- return {
1692
- isComplete: true,
1693
- confidence,
1694
- signals: {
1695
- hasExplicitCompletionStatement: true,
1696
- hasIncompleteWorkIndicators: false,
1697
- hasPendingActionIndicators: false,
1698
- hasErrorIndicators: metrics.failedTechniques > metrics.successfulTechniques,
1699
- hasFollowUpQuestions: false,
1700
- toolsUsedInLastResponse: metrics.totalTechniquesExecuted,
1701
- lastToolWasReadOnly: false,
1702
- consecutiveResponsesWithoutTools: 0,
1703
- hasRecentFileWrites: false,
1704
- hasRecentCommits: false,
1705
- todoItemsPending: 0,
1706
- todoItemsCompleted: metrics.successfulTechniques,
1707
- mentionsFutureWork: false,
1708
- completionConfidence: confidence,
1709
- },
1710
- reason: 'Unified execution complete',
1711
- shouldVerify: confidence < 0.8,
1712
- };
1713
- }
1714
- buildAuthorizationError(auth) {
1715
- return {
1716
- finalResponse: `## Authorization Required\n\n${auth.reason}\n\n### Warnings\n${auth.warnings.map(w => `- ${w}`).join('\n')}`,
1717
- toolsUsed: [],
1718
- planOnly: false,
1719
- tookAction: false,
1720
- completion: {
1721
- isComplete: true,
1722
- confidence: 1.0,
1723
- signals: {
1724
- hasExplicitCompletionStatement: true,
1725
- hasIncompleteWorkIndicators: false,
1726
- hasPendingActionIndicators: false,
1727
- hasErrorIndicators: false,
1728
- hasFollowUpQuestions: true,
1729
- toolsUsedInLastResponse: 0,
1730
- lastToolWasReadOnly: false,
1731
- consecutiveResponsesWithoutTools: 0,
1732
- hasRecentFileWrites: false,
1733
- hasRecentCommits: false,
1734
- todoItemsPending: 0,
1735
- todoItemsCompleted: 0,
1736
- mentionsFutureWork: false,
1737
- completionConfidence: 1.0,
1738
- },
1739
- reason: 'Authorization required',
1740
- shouldVerify: false,
1741
- },
1742
- exitReason: 'attack-chain-aborted',
1743
- statusSummary: `Authorization required: ${auth.reason}`,
1744
- limitations: auth.warnings,
1745
- recommendations: [
1746
- 'Provide CTF/lab context',
1747
- 'Specify pentest authorization',
1748
- 'Use --auth-context flag',
1749
- ],
1750
- };
1751
- }
1752
- buildExecutionSummary(executions, metrics, title) {
1753
- const lines = [];
1754
- lines.push(`## ${title} Execution Summary\n`);
1755
- lines.push(`**Total Techniques:** ${metrics.totalTechniquesExecuted}`);
1756
- lines.push(`**Successful:** ${metrics.successfulTechniques}`);
1757
- lines.push(`**Failed:** ${metrics.failedTechniques}`);
1758
- lines.push(`**Duration:** ${(metrics.totalDuration / 1000).toFixed(1)}s`);
1759
- lines.push(`**Detection Risk:** ${(metrics.averageDetectionRisk * 100).toFixed(1)}%`);
1760
- lines.push(`**Artifacts:** ${metrics.artifactsCollected}`);
1761
- lines.push(`**Phases:** ${metrics.phasesCompleted.join(', ') || 'none'}`);
1762
- lines.push('');
1763
- lines.push(`### Technique Results\n`);
1764
- for (const exec of executions) {
1765
- const status = exec.success ? '✓' : '✗';
1766
- lines.push(`**${status} ${exec.techniqueName}** (${exec.phase})`);
1767
- lines.push(`- Duration: ${exec.duration}ms`);
1768
- lines.push(`- Risk: ${(exec.detectionRisk * 100).toFixed(0)}%`);
1769
- if (exec.artifacts.length > 0) {
1770
- lines.push(`- Artifacts: ${exec.artifacts.length}`);
1771
- }
1772
- lines.push('');
1773
- }
1774
- return lines.join('\n');
1775
- }
1776
- buildRecommendations(executions) {
1777
- const recommendations = [];
1778
- const failed = executions.filter(e => !e.success);
1779
- if (failed.length > 0) {
1780
- recommendations.push(`Review ${failed.length} failed technique(s)`);
1781
- }
1782
- const highRisk = executions.filter(e => e.detectionRisk > 0.7);
1783
- if (highRisk.length > 0) {
1784
- recommendations.push(`${highRisk.length} technique(s) have high detection risk`);
1785
- }
1786
- const nextTechniques = [...new Set(executions.flatMap(e => e.nextTechniques))];
1787
- if (nextTechniques.length > 0) {
1788
- recommendations.push(`Consider: ${nextTechniques.slice(0, 5).join(', ')}`);
1789
- }
1790
- return recommendations;
1791
- }
1792
- // ═══════════════════════════════════════════════════════════════════════════════
1793
- // FULL OFFENSIVE ATTACK CHAIN EXECUTION
1794
- // Direct integration with UnifiedAttackChain tool
1795
- // ═══════════════════════════════════════════════════════════════════════════════
1796
- /** TAO tool suite instance for direct tool invocation */
1797
- taoTools = null;
1798
- /**
1799
- * Get or create the TAO tools instance.
1800
- */
1801
- getTaoTools() {
1802
- if (!this.taoTools) {
1803
- this.taoTools = createTaoTools(process.cwd());
1804
- }
1805
- return this.taoTools;
1806
- }
1807
- /**
1808
- * Execute full offensive attack chain using the UnifiedAttackChain tool.
1809
- * This integrates all 12 kill-chain phases with configurable stealth and objectives.
1810
- */
1811
- async executeFullAttackChain(targets, options = {}) {
1812
- const startTime = Date.now();
1813
- const chainId = `chain_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
1814
- // Validate authorization
1815
- const intent = parseNaturalLanguageIntent(`attack ${targets.join(' ')}`);
1816
- const auth = this.validateAttackAuthorization(intent, options.authorizationContext);
1817
- if (!auth.authorized) {
1818
- return this.buildAuthorizationError(auth);
1819
- }
1820
- const taoTools = this.getTaoTools();
1821
- const unifiedAttackChainTool = taoTools.tools.find(t => t.name === 'UnifiedAttackChain');
1822
- if (!unifiedAttackChainTool) {
1823
- return {
1824
- finalResponse: 'UnifiedAttackChain tool not available',
1825
- toolsUsed: [],
1826
- planOnly: false,
1827
- tookAction: false,
1828
- completion: this.buildCompletionAnalysis({
1829
- totalTechniquesExecuted: 0,
1830
- successfulTechniques: 0,
1831
- failedTechniques: 0,
1832
- totalDuration: 0,
1833
- averageDetectionRisk: 0,
1834
- phasesCompleted: [],
1835
- artifactsCollected: 0,
1836
- rlRewardAverage: 0,
1837
- }),
1838
- exitReason: 'attack-chain-aborted',
1839
- statusSummary: 'Tool unavailable',
1840
- limitations: ['UnifiedAttackChain tool not loaded'],
1841
- recommendations: ['Check TAO tools configuration'],
1842
- };
1843
- }
1844
- const result = {
1845
- chainId,
1846
- targets,
1847
- startTime,
1848
- endTime: 0,
1849
- duration: 0,
1850
- phases: [],
1851
- artifacts: [],
1852
- credentials: [],
1853
- persistence: [],
1854
- c2Channels: [],
1855
- exfilData: [],
1856
- detectionEvents: [],
1857
- overallSuccess: false,
1858
- successRate: 0,
1859
- stealthScore: 1.0,
1860
- };
1861
- const phasesToExecute = options.killChainPhases ?? KILL_CHAIN_PHASES;
1862
- const stealthLevel = options.stealthLevel ?? 'moderate';
1863
- const continueOnFailure = options.continueOnFailure ?? true;
1864
- let successfulPhases = 0;
1865
- let totalTechniques = 0;
1866
- let successfulTechniques = 0;
1867
- for (const phase of phasesToExecute) {
1868
- // Check time limit
1869
- if (options.chainTimeLimit) {
1870
- const elapsed = (Date.now() - startTime) / 1000;
1871
- if (elapsed >= options.chainTimeLimit) {
1872
- result.detectionEvents.push({
1873
- time: Date.now(),
1874
- type: 'timeout',
1875
- severity: 'info',
1876
- });
1877
- break;
1878
- }
1879
- }
1880
- try {
1881
- // Execute phase using UnifiedAttackChain tool
1882
- const phaseResult = await unifiedAttackChainTool.handler({
1883
- operation: 'execute_phase',
1884
- targets,
1885
- phase,
1886
- chain_id: chainId,
1887
- depth: options.attackDepth ?? 'standard',
1888
- stealth_level: stealthLevel,
1889
- objectives: options.chainObjectives ?? [],
1890
- constraints: options.operationalConstraints ?? [],
1891
- c2_config: options.c2Config,
1892
- exfil_config: options.exfilConfig,
1893
- });
1894
- const parsed = JSON.parse(phaseResult);
1895
- const phaseData = {
1896
- phase,
1897
- status: parsed.success ? 'success' : parsed.partial ? 'partial' : 'failed',
1898
- techniques: parsed.techniques?.map((t) => ({
1899
- id: t['id'],
1900
- name: t['name'],
1901
- success: t['success'],
1902
- duration: t['duration'],
1903
- risk: t['risk'],
1904
- outputs: t['outputs'] ?? [],
1905
- })) ?? [],
1906
- outputs: parsed.outputs ?? [],
1907
- };
1908
- result.phases.push(phaseData);
1909
- // Collect artifacts
1910
- if (parsed.artifacts) {
1911
- for (const artifact of parsed.artifacts) {
1912
- result.artifacts.push({ ...artifact, phase });
1913
- }
1914
- }
1915
- // Collect credentials
1916
- if (parsed.credentials) {
1917
- result.credentials.push(...parsed.credentials);
1918
- }
1919
- // Collect persistence mechanisms
1920
- if (parsed.persistence) {
1921
- result.persistence.push(...parsed.persistence);
1922
- }
1923
- // Collect C2 channels
1924
- if (parsed.c2_channels) {
1925
- result.c2Channels.push(...parsed.c2_channels);
1926
- }
1927
- // Collect exfil data
1928
- if (parsed.exfil_data) {
1929
- result.exfilData.push(...parsed.exfil_data);
1930
- }
1931
- // Update metrics
1932
- totalTechniques += phaseData.techniques.length;
1933
- successfulTechniques += phaseData.techniques.filter(t => t.success).length;
1934
- if (phaseData.status === 'success') {
1935
- successfulPhases++;
1936
- }
1937
- // Update stealth score
1938
- const phaseRisk = phaseData.techniques.reduce((sum, t) => sum + t.risk, 0) /
1939
- Math.max(1, phaseData.techniques.length);
1940
- result.stealthScore = Math.min(result.stealthScore, 1 - phaseRisk);
1941
- // Callback
1942
- options.onPhaseComplete?.(phase, phaseData.outputs);
1943
- // Check if we should continue
1944
- if (phaseData.status === 'failed' && !continueOnFailure) {
1945
- result.detectionEvents.push({
1946
- time: Date.now(),
1947
- type: 'phase_failure',
1948
- severity: 'high',
1949
- });
1950
- break;
1951
- }
1952
- }
1953
- catch (error) {
1954
- result.phases.push({
1955
- phase,
1956
- status: 'failed',
1957
- techniques: [],
1958
- outputs: [],
1959
- });
1960
- result.detectionEvents.push({
1961
- time: Date.now(),
1962
- type: 'execution_error',
1963
- severity: 'critical',
1964
- });
1965
- if (!continueOnFailure)
1966
- break;
1967
- }
1968
- }
1969
- // Finalize result
1970
- result.endTime = Date.now();
1971
- result.duration = result.endTime - result.startTime;
1972
- result.successRate = totalTechniques > 0 ? successfulTechniques / totalTechniques : 0;
1973
- result.overallSuccess = result.successRate >= 0.5 && successfulPhases >= phasesToExecute.length / 2;
1974
- // Callback
1975
- options.onChainComplete?.(result);
1976
- // Build response
1977
- const summary = this.buildFullAttackChainSummary(result);
1978
- return {
1979
- finalResponse: summary,
1980
- toolsUsed: result.phases.flatMap(p => p.techniques.map(t => `tao:${t.id}`)),
1981
- planOnly: false,
1982
- tookAction: true,
1983
- completion: this.buildCompletionAnalysis({
1984
- totalTechniquesExecuted: totalTechniques,
1985
- successfulTechniques,
1986
- failedTechniques: totalTechniques - successfulTechniques,
1987
- totalDuration: result.duration,
1988
- averageDetectionRisk: 1 - result.stealthScore,
1989
- phasesCompleted: result.phases.filter(p => p.status === 'success').map(p => p.phase),
1990
- artifactsCollected: result.artifacts.length,
1991
- rlRewardAverage: getRLState().avgReward,
1992
- }),
1993
- exitReason: result.overallSuccess ? 'attack-chain-complete' : 'attack-chain-aborted',
1994
- statusSummary: `Full Kill-Chain: ${successfulPhases}/${phasesToExecute.length} phases, ${Math.round(result.successRate * 100)}% success`,
1995
- limitations: result.phases.filter(p => p.status === 'failed').map(p => `Phase '${p.phase}' failed`),
1996
- recommendations: this.buildFullChainRecommendations(result),
1997
- fullAttackChainResult: result,
1998
- attackChains: listActiveChains(),
1999
- };
2000
- }
2001
- /**
2002
- * Build summary for full attack chain execution.
2003
- */
2004
- buildFullAttackChainSummary(result) {
2005
- const lines = [];
2006
- lines.push('## Full Kill-Chain Attack Summary\n');
2007
- lines.push(`**Chain ID:** ${result.chainId}`);
2008
- lines.push(`**Targets:** ${result.targets.join(', ')}`);
2009
- lines.push(`**Duration:** ${(result.duration / 1000).toFixed(1)}s`);
2010
- lines.push(`**Success Rate:** ${Math.round(result.successRate * 100)}%`);
2011
- lines.push(`**Stealth Score:** ${Math.round(result.stealthScore * 100)}%`);
2012
- lines.push('');
2013
- lines.push('### Phase Results\n');
2014
- for (const phase of result.phases) {
2015
- const statusIcon = phase.status === 'success' ? '✓' :
2016
- phase.status === 'partial' ? '◐' : '✗';
2017
- lines.push(`**${statusIcon} ${phase.phase.toUpperCase()}**`);
2018
- lines.push(`- Status: ${phase.status}`);
2019
- lines.push(`- Techniques: ${phase.techniques.filter(t => t.success).length}/${phase.techniques.length}`);
2020
- if (phase.outputs.length > 0) {
2021
- lines.push(`- Outputs: ${phase.outputs.join(', ')}`);
2022
- }
2023
- lines.push('');
2024
- }
2025
- if (result.credentials.length > 0) {
2026
- lines.push('### Credentials Harvested\n');
2027
- lines.push(`- **Total:** ${result.credentials.length}`);
2028
- const byType = new Map();
2029
- for (const cred of result.credentials) {
2030
- byType.set(cred.type, (byType.get(cred.type) || 0) + 1);
2031
- }
2032
- for (const [type, count] of byType) {
2033
- lines.push(`- ${type}: ${count}`);
2034
- }
2035
- lines.push('');
2036
- }
2037
- if (result.persistence.length > 0) {
2038
- lines.push('### Persistence Mechanisms\n');
2039
- for (const p of result.persistence) {
2040
- lines.push(`- **${p.mechanism}:** ${p.status}`);
2041
- }
2042
- lines.push('');
2043
- }
2044
- if (result.c2Channels.length > 0) {
2045
- lines.push('### C2 Channels\n');
2046
- for (const c2 of result.c2Channels) {
2047
- lines.push(`- **${c2.type}:** ${c2.status}`);
2048
- }
2049
- lines.push('');
2050
- }
2051
- if (result.exfilData.length > 0) {
2052
- lines.push('### Exfiltrated Data\n');
2053
- const totalSize = result.exfilData.reduce((sum, e) => sum + e.size, 0);
2054
- lines.push(`- **Total:** ${result.exfilData.length} items, ${(totalSize / 1024).toFixed(1)} KB`);
2055
- lines.push('');
2056
- }
2057
- if (result.artifacts.length > 0) {
2058
- lines.push('### Artifacts Collected\n');
2059
- const byPhase = new Map();
2060
- for (const a of result.artifacts) {
2061
- byPhase.set(a.phase, (byPhase.get(a.phase) || 0) + 1);
2062
- }
2063
- for (const [phase, count] of byPhase) {
2064
- lines.push(`- ${phase}: ${count}`);
2065
- }
2066
- lines.push('');
2067
- }
2068
- if (result.detectionEvents.length > 0) {
2069
- lines.push('### Detection Events\n');
2070
- for (const event of result.detectionEvents) {
2071
- lines.push(`- **${event.severity.toUpperCase()}:** ${event.type}`);
2072
- }
2073
- lines.push('');
2074
- }
2075
- return lines.join('\n');
2076
- }
2077
- /**
2078
- * Build recommendations for full chain execution.
2079
- */
2080
- buildFullChainRecommendations(result) {
2081
- const recommendations = [];
2082
- const failedPhases = result.phases.filter(p => p.status === 'failed');
2083
- if (failedPhases.length > 0) {
2084
- recommendations.push(`Review failed phases: ${failedPhases.map(p => p.phase).join(', ')}`);
2085
- }
2086
- if (result.stealthScore < 0.5) {
2087
- recommendations.push('High detection risk detected - consider more stealthy techniques');
2088
- }
2089
- if (result.credentials.length === 0 && result.phases.some(p => p.phase === 'credential_access')) {
2090
- recommendations.push('No credentials harvested - try alternative credential techniques');
2091
- }
2092
- if (result.persistence.length === 0 && result.phases.some(p => p.phase === 'persistence')) {
2093
- recommendations.push('No persistence established - critical for long-term access');
2094
- }
2095
- if (result.c2Channels.length === 0 && result.phases.some(p => p.phase === 'command_control')) {
2096
- recommendations.push('No C2 channels established - consider fallback C2 options');
2097
- }
2098
- return recommendations;
2099
- }
2100
- /**
2101
- * Run full offensive attack chain mode.
2102
- * This is the main entry point for full kill-chain operations.
2103
- */
2104
- async runFullOffensiveChain(request, options = {}) {
2105
- // Parse targets from request
2106
- const intent = parseNaturalLanguageIntent(request);
2107
- const targets = options.attackTargets ?? intent.targets;
2108
- if (targets.length === 0) {
2109
- return {
2110
- finalResponse: 'No targets specified for attack chain',
2111
- toolsUsed: [],
2112
- planOnly: false,
2113
- tookAction: false,
2114
- completion: this.buildCompletionAnalysis({
2115
- totalTechniquesExecuted: 0,
2116
- successfulTechniques: 0,
2117
- failedTechniques: 0,
2118
- totalDuration: 0,
2119
- averageDetectionRisk: 0,
2120
- phasesCompleted: [],
2121
- artifactsCollected: 0,
2122
- rlRewardAverage: 0,
2123
- }),
2124
- exitReason: 'incomplete',
2125
- statusSummary: 'No targets',
2126
- limitations: ['No targets provided'],
2127
- recommendations: ['Specify targets using --targets or in the request'],
2128
- };
2129
- }
2130
- return this.executeFullAttackChain(targets, options);
2131
- }
2132
- }
2133
- //# sourceMappingURL=agentOrchestrator.js.map