pentesting 0.73.2 → 0.73.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -0
- package/dist/agent-tool-MP274HWD.js +989 -0
- package/dist/{chunk-BGEXGHPB.js → chunk-3KWJPPYB.js} +917 -430
- package/dist/{chunk-KBJPZDIL.js → chunk-7E2VUIFU.js} +456 -211
- package/dist/chunk-I52SWXYV.js +1122 -0
- package/dist/main.js +2285 -474
- package/dist/{persistence-VFIOGTRC.js → persistence-BNVN3WW6.js} +2 -2
- package/dist/{process-registry-GSHEX2LT.js → process-registry-BI7BKPHN.js} +1 -1
- package/dist/prompts/main-agent.md +35 -1
- package/dist/prompts/strategist-system.md +34 -0
- package/package.json +3 -4
- package/dist/agent-tool-HYQGTZC4.js +0 -256
- package/dist/chunk-YFDJI3GO.js +0 -331
|
@@ -116,7 +116,9 @@ multiple sequential decisions that depend on each other's output.
|
|
|
116
116
|
run_task({
|
|
117
117
|
task: "WHAT to achieve — the goal, not the method",
|
|
118
118
|
target: "IP:port or URL (optional)",
|
|
119
|
-
context: "Short context the sub-agent needs (optional)"
|
|
119
|
+
context: "Short context the sub-agent needs (optional)",
|
|
120
|
+
worker_type: "general | shell-supervisor | exploit | pwn (optional)"
|
|
121
|
+
resume_task_id: "delegated_task_... (optional when continuing an existing delegated chain)"
|
|
120
122
|
})
|
|
121
123
|
```
|
|
122
124
|
|
|
@@ -124,6 +126,38 @@ run_task({
|
|
|
124
126
|
Results come back as `[Status]`, `[Summary]`, `[Findings]`, `[Loot]`.
|
|
125
127
|
After run_task completes: record key findings to canonical state if needed.
|
|
126
128
|
|
|
129
|
+
### Active delegated tasks in `<current-state>`
|
|
130
|
+
|
|
131
|
+
If `<current-state>` shows `Delegated Tasks`, treat them as live operational commitments, not passive notes.
|
|
132
|
+
|
|
133
|
+
- `waiting` = an external event is pending; poll or supervise the related asset
|
|
134
|
+
- `running` = a worker is mid-operation; avoid duplicating the same chain
|
|
135
|
+
- `resume` = preferred next action to continue that delegated task
|
|
136
|
+
- `worker:` = preferred worker type for the next delegated step
|
|
137
|
+
- `assets:` and `sessions:` = operational handles you must reuse before creating new ones
|
|
138
|
+
|
|
139
|
+
When an active delegated task exists:
|
|
140
|
+
- prefer resuming/supervising it over starting a parallel duplicate
|
|
141
|
+
- if the task already has the right listener/shell asset, reuse it
|
|
142
|
+
- if it needs complex follow-up, call `run_task` again with the resume goal, not a fresh unrelated plan
|
|
143
|
+
- if `worker:` is present, pass that same `worker_type` into `run_task` unless the evidence clearly suggests a different specialist
|
|
144
|
+
- if you are continuing an existing delegated chain, pass `resume_task_id` for that active delegated task
|
|
145
|
+
- if the task is obsolete or superseded, explicitly update mission/checklist so it is not retried blindly
|
|
146
|
+
|
|
147
|
+
### `<delegated-execution-request>`
|
|
148
|
+
|
|
149
|
+
If the system prompt includes `<delegated-execution-request>`, treat it as the current preferred delegated execution payload.
|
|
150
|
+
|
|
151
|
+
- `task:` = exact delegated continuation objective
|
|
152
|
+
- `worker_type:` = worker specialization to preserve
|
|
153
|
+
- `resume_task_id:` = delegated chain ID to continue
|
|
154
|
+
- `target:` / `context:` = optional carry-over execution context
|
|
155
|
+
|
|
156
|
+
When this block is present:
|
|
157
|
+
- do not invent a different delegated chain unless the evidence clearly invalidates the current one
|
|
158
|
+
- prefer using these exact parameters if you call `run_task`
|
|
159
|
+
- if you decide not to follow it, you must have concrete evidence that the delegated request is obsolete, dead, or superseded
|
|
160
|
+
|
|
127
161
|
## Parallel Operations
|
|
128
162
|
|
|
129
163
|
Background everything that takes >2 min or can run alongside foreground work:
|
|
@@ -436,3 +436,37 @@ DO NOT DELEGATE:
|
|
|
436
436
|
├─ Direct state updates (add_finding, add_loot)
|
|
437
437
|
└─ Tasks requiring user interaction (ask_user)
|
|
438
438
|
```
|
|
439
|
+
|
|
440
|
+
### Rule 13: ACTIVE DELEGATED TASKS
|
|
441
|
+
```
|
|
442
|
+
If Engagement State contains "Delegated Tasks", treat them as active operational context.
|
|
443
|
+
|
|
444
|
+
INTERPRETATION:
|
|
445
|
+
├─ status=waiting → external event pending; recommend supervision/poll/resume
|
|
446
|
+
├─ status=running → operation already in progress; do not duplicate it
|
|
447
|
+
├─ worker:<type> → preferred next worker class for continuation
|
|
448
|
+
├─ assets: → reuse these listeners/shells/payload assets before creating new ones
|
|
449
|
+
└─ resume: → default continuation hint unless stronger evidence overrides it
|
|
450
|
+
|
|
451
|
+
PLANNING RULES:
|
|
452
|
+
├─ Prefer resuming an active delegated task over launching a duplicate chain
|
|
453
|
+
├─ Reverse shell workflows should reuse existing listener/shell assets first
|
|
454
|
+
├─ If a delegated task is waiting on connection, supervision is higher priority than starting a new listener
|
|
455
|
+
└─ Only abandon an active delegated task if the evidence clearly shows it is dead, obsolete, or superseded
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
### Rule 14: DELEGATED EXECUTION REQUEST
|
|
459
|
+
```
|
|
460
|
+
If the system prompt contains <delegated-execution-request>, treat it as the runtime-selected continuation payload.
|
|
461
|
+
|
|
462
|
+
INTERPRETATION:
|
|
463
|
+
├─ task: → exact delegated continuation objective
|
|
464
|
+
├─ worker_type: → worker specialization to preserve
|
|
465
|
+
├─ resume_task_id: → delegated chain lineage to continue
|
|
466
|
+
└─ context/target: → carry-over execution context
|
|
467
|
+
|
|
468
|
+
PLANNING RULES:
|
|
469
|
+
├─ Prefer this payload over inventing a fresh delegated chain
|
|
470
|
+
├─ Preserve worker_type and resume_task_id unless concrete evidence invalidates them
|
|
471
|
+
└─ If you reject the payload, the reason must be explicit and evidence-based
|
|
472
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pentesting",
|
|
3
|
-
"version": "0.73.
|
|
3
|
+
"version": "0.73.4",
|
|
4
4
|
"description": "Autonomous Penetration Testing AI Agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/main.js",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"dev:tsx": "tsx src/platform/tui/main.tsx",
|
|
19
19
|
"build": "NODE_OPTIONS='--max-old-space-size=4096' tsup",
|
|
20
20
|
"start": "node dist/main.js",
|
|
21
|
-
"test": "mkdir -p .vitest
|
|
21
|
+
"test": "sh -c 'mkdir -p .vitest; TMPDIR=.vitest npx vitest run \"$@\"; status=$?; rm -rf .vitest .pentesting; exit $status' --",
|
|
22
22
|
"test:watch": "vitest",
|
|
23
23
|
"lint": "tsc --noEmit",
|
|
24
24
|
"verify": "npm run test && npm run build",
|
|
@@ -71,8 +71,7 @@
|
|
|
71
71
|
"commander": "^14.0.3",
|
|
72
72
|
"ink": "^6.8.0",
|
|
73
73
|
"playwright": "^1.58.2",
|
|
74
|
-
"react": "^19.2.4"
|
|
75
|
-
"yaml": "^2.8.2"
|
|
74
|
+
"react": "^19.2.4"
|
|
76
75
|
},
|
|
77
76
|
"devDependencies": {
|
|
78
77
|
"@types/node": "^25.3.0",
|
|
@@ -1,256 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
CategorizedToolRegistry,
|
|
3
|
-
CoreAgent,
|
|
4
|
-
createContextExtractor,
|
|
5
|
-
getLLMClient
|
|
6
|
-
} from "./chunk-BGEXGHPB.js";
|
|
7
|
-
import {
|
|
8
|
-
AGENT_ROLES,
|
|
9
|
-
EVENT_TYPES,
|
|
10
|
-
LLM_ROLES,
|
|
11
|
-
TOOL_NAMES
|
|
12
|
-
} from "./chunk-KBJPZDIL.js";
|
|
13
|
-
import {
|
|
14
|
-
getActiveProcessSummary
|
|
15
|
-
} from "./chunk-YFDJI3GO.js";
|
|
16
|
-
|
|
17
|
-
// src/engine/agent-tool/completion-box.ts
|
|
18
|
-
function createCompletionBox() {
|
|
19
|
-
return { done: false, result: null };
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// src/engine/agent-tool/task-complete.ts
|
|
23
|
-
function createTaskCompleteTool(completion) {
|
|
24
|
-
return {
|
|
25
|
-
name: TOOL_NAMES.TASK_COMPLETE,
|
|
26
|
-
description: `Signal task completion. Call this when the delegated task is done.
|
|
27
|
-
Include all findings and loot discovered during the task.
|
|
28
|
-
Use status: 'success' if goal achieved, 'partial' if partially done, 'failed' if blocked.`,
|
|
29
|
-
parameters: {
|
|
30
|
-
status: {
|
|
31
|
-
type: "string",
|
|
32
|
-
enum: ["success", "partial", "failed"],
|
|
33
|
-
description: "Task completion status"
|
|
34
|
-
},
|
|
35
|
-
summary: {
|
|
36
|
-
type: "string",
|
|
37
|
-
description: "What was accomplished (or why it failed)"
|
|
38
|
-
},
|
|
39
|
-
tried: {
|
|
40
|
-
type: "array",
|
|
41
|
-
items: { type: "string" },
|
|
42
|
-
description: "Approaches attempted during the task"
|
|
43
|
-
},
|
|
44
|
-
findings: {
|
|
45
|
-
type: "array",
|
|
46
|
-
items: { type: "string" },
|
|
47
|
-
description: "Security findings discovered (summary for main loop)"
|
|
48
|
-
},
|
|
49
|
-
loot: {
|
|
50
|
-
type: "array",
|
|
51
|
-
items: { type: "string" },
|
|
52
|
-
description: "Credentials, flags, or sensitive data obtained"
|
|
53
|
-
},
|
|
54
|
-
sessions: {
|
|
55
|
-
type: "array",
|
|
56
|
-
items: { type: "string" },
|
|
57
|
-
description: "Active session IDs established during the task"
|
|
58
|
-
},
|
|
59
|
-
suggested_next: {
|
|
60
|
-
type: "string",
|
|
61
|
-
description: "Recommended next action for the main agent"
|
|
62
|
-
}
|
|
63
|
-
},
|
|
64
|
-
required: ["status", "summary"],
|
|
65
|
-
execute: async (params) => {
|
|
66
|
-
const result = {
|
|
67
|
-
status: params["status"] ?? "partial",
|
|
68
|
-
summary: params["summary"] ?? "",
|
|
69
|
-
tried: params["tried"] ?? [],
|
|
70
|
-
findings: params["findings"] ?? [],
|
|
71
|
-
loot: params["loot"] ?? [],
|
|
72
|
-
sessions: params["sessions"] ?? [],
|
|
73
|
-
suggestedNext: params["suggested_next"] ?? ""
|
|
74
|
-
};
|
|
75
|
-
completion.done = true;
|
|
76
|
-
completion.result = result;
|
|
77
|
-
return {
|
|
78
|
-
success: true,
|
|
79
|
-
output: [
|
|
80
|
-
"[TASK_COMPLETE]",
|
|
81
|
-
`[Status] ${result.status}`,
|
|
82
|
-
`[Summary] ${result.summary}`
|
|
83
|
-
].join("\n")
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// src/engine/agent-tool/agent-registry.ts
|
|
90
|
-
var AgentRegistry = class extends CategorizedToolRegistry {
|
|
91
|
-
constructor(state, scopeGuard, approvalGate, events, completion) {
|
|
92
|
-
super(state, scopeGuard, approvalGate, events);
|
|
93
|
-
const taskCompleteTool = createTaskCompleteTool(completion);
|
|
94
|
-
this.tools.set(taskCompleteTool.name, taskCompleteTool);
|
|
95
|
-
}
|
|
96
|
-
initializeRegistry() {
|
|
97
|
-
super.initializeRegistry();
|
|
98
|
-
this.tools.delete(TOOL_NAMES.RUN_TASK);
|
|
99
|
-
this.tools.delete(TOOL_NAMES.ASK_USER);
|
|
100
|
-
}
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
// src/engine/agent-tool/agent-runner.ts
|
|
104
|
-
var MAX_AGENT_TOOL_ITERATIONS = 30;
|
|
105
|
-
var COMPRESS_EVERY_N_STEPS = 5;
|
|
106
|
-
var MAX_COMPRESS_FAILURES = 3;
|
|
107
|
-
var AgentRunner = class extends CoreAgent {
|
|
108
|
-
completion;
|
|
109
|
-
contextExtractor;
|
|
110
|
-
stepCount = 0;
|
|
111
|
-
consecutiveCompressFailures = 0;
|
|
112
|
-
constructor(state, events, registry, completion) {
|
|
113
|
-
super(AGENT_ROLES.AGENT_TOOL, state, events, registry, MAX_AGENT_TOOL_ITERATIONS);
|
|
114
|
-
this.completion = completion;
|
|
115
|
-
this.contextExtractor = createContextExtractor(getLLMClient());
|
|
116
|
-
}
|
|
117
|
-
/**
|
|
118
|
-
* CoreAgent.step() 오버라이드
|
|
119
|
-
*
|
|
120
|
-
* 추가 동작 (super.step() 이후):
|
|
121
|
-
* 1. completion.done 확인 → task_complete 호출됐으면 즉시 완료 신호
|
|
122
|
-
* 2. COMPRESS_EVERY_N_STEPS마다 ContextExtractor 호출
|
|
123
|
-
*/
|
|
124
|
-
async step(iteration, messages, systemPrompt, progress) {
|
|
125
|
-
const result = await super.step(iteration, messages, systemPrompt, progress);
|
|
126
|
-
if (this.completion.done) {
|
|
127
|
-
return {
|
|
128
|
-
output: JSON.stringify(this.completion.result),
|
|
129
|
-
toolsExecuted: result.toolsExecuted,
|
|
130
|
-
isCompleted: true
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
this.stepCount++;
|
|
134
|
-
if (this.stepCount % COMPRESS_EVERY_N_STEPS === 0) {
|
|
135
|
-
await this.compressContext(messages);
|
|
136
|
-
}
|
|
137
|
-
return result;
|
|
138
|
-
}
|
|
139
|
-
/**
|
|
140
|
-
* ContextExtractor를 사용해 messages[]를 1개 session-context로 압축.
|
|
141
|
-
*
|
|
142
|
-
* WHY: 실패 시 무시 (try/catch). CoreAgent의 trimMessagesIfNeeded(50)이
|
|
143
|
-
* 최후 안전망이므로 압축 실패가 치명적이지 않다.
|
|
144
|
-
* WHY (failure tracking): 연속 실패가 MAX_COMPRESS_FAILURES 초과 시 경고 emit.
|
|
145
|
-
* sub-agent가 초기 task를 잃어버릴 위험을 TUI에 노출해 사용자가 인지할 수 있게 한다.
|
|
146
|
-
*/
|
|
147
|
-
async compressContext(messages) {
|
|
148
|
-
try {
|
|
149
|
-
const result = await this.contextExtractor.execute({ messages });
|
|
150
|
-
if (result.success && result.extractedContext) {
|
|
151
|
-
messages.length = 0;
|
|
152
|
-
messages.push({
|
|
153
|
-
role: LLM_ROLES.USER,
|
|
154
|
-
content: `<session-context>
|
|
155
|
-
${result.extractedContext}
|
|
156
|
-
</session-context>`
|
|
157
|
-
});
|
|
158
|
-
this.consecutiveCompressFailures = 0;
|
|
159
|
-
}
|
|
160
|
-
} catch {
|
|
161
|
-
this.consecutiveCompressFailures++;
|
|
162
|
-
if (this.consecutiveCompressFailures === MAX_COMPRESS_FAILURES) {
|
|
163
|
-
this.events.emit({
|
|
164
|
-
type: EVENT_TYPES.NOTIFICATION,
|
|
165
|
-
timestamp: Date.now(),
|
|
166
|
-
data: {
|
|
167
|
-
title: "Sub-Agent Context Warning",
|
|
168
|
-
message: `Context compression failed ${this.consecutiveCompressFailures}x in a row. Sub-agent may lose task direction. Consider reducing task scope.`,
|
|
169
|
-
level: "warning"
|
|
170
|
-
}
|
|
171
|
-
});
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
};
|
|
176
|
-
|
|
177
|
-
// src/engine/agent-tool/agent-prompt.ts
|
|
178
|
-
function buildAgentPrompt(input) {
|
|
179
|
-
const parts = [
|
|
180
|
-
"You are an autonomous execution agent. Complete the delegated task using available tools.",
|
|
181
|
-
"",
|
|
182
|
-
`## Task
|
|
183
|
-
${input.task}`
|
|
184
|
-
];
|
|
185
|
-
if (input.target) {
|
|
186
|
-
parts.push(`
|
|
187
|
-
## Target
|
|
188
|
-
${input.target}`);
|
|
189
|
-
}
|
|
190
|
-
if (input.context) {
|
|
191
|
-
parts.push(`
|
|
192
|
-
## Context
|
|
193
|
-
${input.context}`);
|
|
194
|
-
}
|
|
195
|
-
const activeProcesses = getActiveProcessSummary();
|
|
196
|
-
if (activeProcesses) {
|
|
197
|
-
parts.push(`
|
|
198
|
-
## Active Background Processes
|
|
199
|
-
${activeProcesses}`);
|
|
200
|
-
}
|
|
201
|
-
parts.push(`
|
|
202
|
-
## Rules
|
|
203
|
-
- Do NOT call ask_user. You are autonomous. Make your best judgment.
|
|
204
|
-
- Call task_complete when the task is done (status: success, partial, or failed).
|
|
205
|
-
- Record findings with add_finding, loot with add_loot as you discover them.
|
|
206
|
-
- If you hit 3 consecutive failures on the same approach, switch vectors or declare failed.
|
|
207
|
-
- Be decisive \u2014 do not loop indefinitely on the same approach.`);
|
|
208
|
-
return parts.join("\n");
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// src/engine/agent-tool/agent-tool.ts
|
|
212
|
-
var TIMEOUT_RESULT = {
|
|
213
|
-
status: "failed",
|
|
214
|
-
summary: `Agent-tool timed out: max ${MAX_AGENT_TOOL_ITERATIONS} iterations reached without task_complete.`,
|
|
215
|
-
tried: [],
|
|
216
|
-
findings: [],
|
|
217
|
-
loot: [],
|
|
218
|
-
sessions: [],
|
|
219
|
-
suggestedNext: "Break the task into smaller sub-tasks and retry."
|
|
220
|
-
};
|
|
221
|
-
var AgentTool = class {
|
|
222
|
-
constructor(state, events, scopeGuard, approvalGate) {
|
|
223
|
-
this.state = state;
|
|
224
|
-
this.events = events;
|
|
225
|
-
this.scopeGuard = scopeGuard;
|
|
226
|
-
this.approvalGate = approvalGate;
|
|
227
|
-
}
|
|
228
|
-
async execute(input) {
|
|
229
|
-
const completion = createCompletionBox();
|
|
230
|
-
const registry = new AgentRegistry(
|
|
231
|
-
this.state,
|
|
232
|
-
this.scopeGuard,
|
|
233
|
-
this.approvalGate,
|
|
234
|
-
this.events,
|
|
235
|
-
completion
|
|
236
|
-
);
|
|
237
|
-
const runner = new AgentRunner(
|
|
238
|
-
this.state,
|
|
239
|
-
this.events,
|
|
240
|
-
registry,
|
|
241
|
-
completion
|
|
242
|
-
);
|
|
243
|
-
const prompt = buildAgentPrompt(input);
|
|
244
|
-
const loopResult = await runner.run(input.task, prompt);
|
|
245
|
-
if (completion.done && completion.result) {
|
|
246
|
-
return completion.result;
|
|
247
|
-
}
|
|
248
|
-
return {
|
|
249
|
-
...TIMEOUT_RESULT,
|
|
250
|
-
tried: [`Reached ${loopResult.iterations} iterations without calling task_complete`]
|
|
251
|
-
};
|
|
252
|
-
}
|
|
253
|
-
};
|
|
254
|
-
export {
|
|
255
|
-
AgentTool
|
|
256
|
-
};
|