pentesting 0.72.12 → 0.73.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,8 +5,8 @@
5
5
  # pentesting
6
6
  > **Autonomous Offensive Security AI Agent**
7
7
 
8
- [![npm](https://img.shields.io/badge/npm-pentesting-red)](https://www.npmjs.org/package/pentesting)
9
- [![docker](https://img.shields.io/badge/docker-pentesting-blue)](https://hub.docker.com/r/agnusdei1207/pentesting)
8
+ [![npm](https://img.shields.io/badge/npm-pentesting-2496ED)](https://www.npmjs.org/package/pentesting)
9
+ [![docker](https://img.shields.io/badge/docker-pentesting-2496ED)](https://hub.docker.com/r/agnusdei1207/pentesting)
10
10
 
11
11
  </div>
12
12
 
@@ -32,17 +32,6 @@
32
32
 
33
33
  Pentesting support tool. Can autonomously execute network penetration tests or assist with generic Capture The Flag (CTF) challenges (such as Reverse Engineering, Cryptography, and binary analysis) without requiring a specific network target.
34
34
 
35
- ## Architecture Notes
36
-
37
- - User input is preprocessed by a dedicated input processor LLM before the main loop acts on it.
38
- - Durable engagement guidance, sensitive data handling rules, and reusable operator constraints are merged into `.pentesting/memory/policy.md`.
39
- - Both the strategist and the main prompt builder read that policy document every turn.
40
- - Each completed turn is compressed into `.pentesting/turns/{N}-memory.md`, with provenance metadata describing who wrote it and what sources were used.
41
- - Automatically maintained LLM documents are intentionally small in number: bounded turn memories, one `policy.md`, one merged `persistent-knowledge.json`, and on-demand reports only.
42
- - Interactive prompts are brokered through a single active input slot in the TUI. Additional prompts wait in a hidden queue and are promoted one at a time.
43
-
44
- ---
45
-
46
35
  ## Quick Start
47
36
 
48
37
  ### z.ai — GLM Coding Plan Max (Recommended)
@@ -57,6 +46,8 @@ docker run -it --rm \
57
46
  agnusdei1207/pentesting
58
47
  ```
59
48
 
49
+ Enable container Tor mode by adding `-e PENTEST_TOR=true` to the same `docker run` command.
50
+
60
51
  ### External Search API (Optional)
61
52
 
62
53
  For providers other than z.ai, or to use a dedicated search backend.
@@ -76,10 +67,35 @@ docker run -it --rm \
76
67
  | Variable | Required | Description |
77
68
  |----------|----------|-------------|
78
69
  | `PENTEST_API_KEY` | ✅ | LLM API key |
79
- | `PENTEST_BASE_URL` | | API endpoint (web search auto-enabled when URL contains `z.ai`) |
80
- | `PENTEST_MODEL` | | Model name (e.g. `glm-4.7`) |
70
+ | `PENTEST_BASE_URL` | | Custom API endpoint (web search auto-enabled when URL contains `z.ai`) |
71
+ | `PENTEST_MODEL` | | Model override (defaults depend on provider/runtime; examples use `glm-4.7`) |
81
72
  | `SEARCH_API_KEY` | ❌ | External search API key (not needed with z.ai) |
82
73
  | `SEARCH_API_URL` | ❌ | External search API URL (not needed with z.ai) |
74
+ | `PENTEST_SCOPE_MODE` | ❌ | Scope mode override: `advisory` or `enforce` |
75
+ | `PENTEST_APPROVAL_MODE` | ❌ | Approval mode override: `advisory` or `require_auto_approve` |
76
+ | `PENTEST_TOR` | ❌ | Container-only Tor mode. When `true`, the Docker entrypoint starts Tor and launches the agent through `proxychains4` |
77
+
78
+ Safety defaults:
79
+
80
+ - Containerized runtime defaults to `PENTEST_SCOPE_MODE=advisory` and `PENTEST_APPROVAL_MODE=advisory`.
81
+ - Non-container runtime defaults to `PENTEST_SCOPE_MODE=enforce` and `PENTEST_APPROVAL_MODE=require_auto_approve`.
82
+ - Explicit env vars override those defaults.
83
+
84
+ Tor notes:
85
+
86
+ - Tor is supported only in the containerized runtime.
87
+ - There is no in-app `/tor` toggle. Enable it at container startup with `-e PENTEST_TOR=true`.
88
+ - Non-container runs ignore `PENTEST_TOR`, so local host execution stays on direct networking.
89
+
90
+ ### Developer Verification
91
+
92
+ ```bash
93
+ npm run verify
94
+ npm run verify:docker
95
+ ```
96
+
97
+ - `npm run check` runs the full non-destructive verification flow.
98
+ - `npm run check:clean` additionally performs `docker system prune -af --volumes` before the full check.
83
99
 
84
100
  ---
85
101
 
@@ -0,0 +1,256 @@
1
+ import {
2
+ CategorizedToolRegistry,
3
+ CoreAgent,
4
+ createContextExtractor,
5
+ getLLMClient
6
+ } from "./chunk-BGEXGHPB.js";
7
+ import {
8
+ AGENT_ROLES,
9
+ EVENT_TYPES,
10
+ LLM_ROLES,
11
+ TOOL_NAMES
12
+ } from "./chunk-KBJPZDIL.js";
13
+ import {
14
+ getActiveProcessSummary
15
+ } from "./chunk-YFDJI3GO.js";
16
+
17
+ // src/engine/agent-tool/completion-box.ts
18
+ function createCompletionBox() {
19
+ return { done: false, result: null };
20
+ }
21
+
22
+ // src/engine/agent-tool/task-complete.ts
23
+ function createTaskCompleteTool(completion) {
24
+ return {
25
+ name: TOOL_NAMES.TASK_COMPLETE,
26
+ description: `Signal task completion. Call this when the delegated task is done.
27
+ Include all findings and loot discovered during the task.
28
+ Use status: 'success' if goal achieved, 'partial' if partially done, 'failed' if blocked.`,
29
+ parameters: {
30
+ status: {
31
+ type: "string",
32
+ enum: ["success", "partial", "failed"],
33
+ description: "Task completion status"
34
+ },
35
+ summary: {
36
+ type: "string",
37
+ description: "What was accomplished (or why it failed)"
38
+ },
39
+ tried: {
40
+ type: "array",
41
+ items: { type: "string" },
42
+ description: "Approaches attempted during the task"
43
+ },
44
+ findings: {
45
+ type: "array",
46
+ items: { type: "string" },
47
+ description: "Security findings discovered (summary for main loop)"
48
+ },
49
+ loot: {
50
+ type: "array",
51
+ items: { type: "string" },
52
+ description: "Credentials, flags, or sensitive data obtained"
53
+ },
54
+ sessions: {
55
+ type: "array",
56
+ items: { type: "string" },
57
+ description: "Active session IDs established during the task"
58
+ },
59
+ suggested_next: {
60
+ type: "string",
61
+ description: "Recommended next action for the main agent"
62
+ }
63
+ },
64
+ required: ["status", "summary"],
65
+ execute: async (params) => {
66
+ const result = {
67
+ status: params["status"] ?? "partial",
68
+ summary: params["summary"] ?? "",
69
+ tried: params["tried"] ?? [],
70
+ findings: params["findings"] ?? [],
71
+ loot: params["loot"] ?? [],
72
+ sessions: params["sessions"] ?? [],
73
+ suggestedNext: params["suggested_next"] ?? ""
74
+ };
75
+ completion.done = true;
76
+ completion.result = result;
77
+ return {
78
+ success: true,
79
+ output: [
80
+ "[TASK_COMPLETE]",
81
+ `[Status] ${result.status}`,
82
+ `[Summary] ${result.summary}`
83
+ ].join("\n")
84
+ };
85
+ }
86
+ };
87
+ }
88
+
89
+ // src/engine/agent-tool/agent-registry.ts
90
+ var AgentRegistry = class extends CategorizedToolRegistry {
91
+ constructor(state, scopeGuard, approvalGate, events, completion) {
92
+ super(state, scopeGuard, approvalGate, events);
93
+ const taskCompleteTool = createTaskCompleteTool(completion);
94
+ this.tools.set(taskCompleteTool.name, taskCompleteTool);
95
+ }
96
+ initializeRegistry() {
97
+ super.initializeRegistry();
98
+ this.tools.delete(TOOL_NAMES.RUN_TASK);
99
+ this.tools.delete(TOOL_NAMES.ASK_USER);
100
+ }
101
+ };
102
+
103
+ // src/engine/agent-tool/agent-runner.ts
104
+ var MAX_AGENT_TOOL_ITERATIONS = 30;
105
+ var COMPRESS_EVERY_N_STEPS = 5;
106
+ var MAX_COMPRESS_FAILURES = 3;
107
+ var AgentRunner = class extends CoreAgent {
108
+ completion;
109
+ contextExtractor;
110
+ stepCount = 0;
111
+ consecutiveCompressFailures = 0;
112
+ constructor(state, events, registry, completion) {
113
+ super(AGENT_ROLES.AGENT_TOOL, state, events, registry, MAX_AGENT_TOOL_ITERATIONS);
114
+ this.completion = completion;
115
+ this.contextExtractor = createContextExtractor(getLLMClient());
116
+ }
117
+ /**
118
+ * CoreAgent.step() 오버라이드
119
+ *
120
+ * 추가 동작 (super.step() 이후):
121
+ * 1. completion.done 확인 → task_complete 호출됐으면 즉시 완료 신호
122
+ * 2. COMPRESS_EVERY_N_STEPS마다 ContextExtractor 호출
123
+ */
124
+ async step(iteration, messages, systemPrompt, progress) {
125
+ const result = await super.step(iteration, messages, systemPrompt, progress);
126
+ if (this.completion.done) {
127
+ return {
128
+ output: JSON.stringify(this.completion.result),
129
+ toolsExecuted: result.toolsExecuted,
130
+ isCompleted: true
131
+ };
132
+ }
133
+ this.stepCount++;
134
+ if (this.stepCount % COMPRESS_EVERY_N_STEPS === 0) {
135
+ await this.compressContext(messages);
136
+ }
137
+ return result;
138
+ }
139
+ /**
140
+ * ContextExtractor를 사용해 messages[]를 1개 session-context로 압축.
141
+ *
142
+ * WHY: 실패 시 무시 (try/catch). CoreAgent의 trimMessagesIfNeeded(50)이
143
+ * 최후 안전망이므로 압축 실패가 치명적이지 않다.
144
+ * WHY (failure tracking): 연속 실패가 MAX_COMPRESS_FAILURES 초과 시 경고 emit.
145
+ * sub-agent가 초기 task를 잃어버릴 위험을 TUI에 노출해 사용자가 인지할 수 있게 한다.
146
+ */
147
+ async compressContext(messages) {
148
+ try {
149
+ const result = await this.contextExtractor.execute({ messages });
150
+ if (result.success && result.extractedContext) {
151
+ messages.length = 0;
152
+ messages.push({
153
+ role: LLM_ROLES.USER,
154
+ content: `<session-context>
155
+ ${result.extractedContext}
156
+ </session-context>`
157
+ });
158
+ this.consecutiveCompressFailures = 0;
159
+ }
160
+ } catch {
161
+ this.consecutiveCompressFailures++;
162
+ if (this.consecutiveCompressFailures === MAX_COMPRESS_FAILURES) {
163
+ this.events.emit({
164
+ type: EVENT_TYPES.NOTIFICATION,
165
+ timestamp: Date.now(),
166
+ data: {
167
+ title: "Sub-Agent Context Warning",
168
+ message: `Context compression failed ${this.consecutiveCompressFailures}x in a row. Sub-agent may lose task direction. Consider reducing task scope.`,
169
+ level: "warning"
170
+ }
171
+ });
172
+ }
173
+ }
174
+ }
175
+ };
176
+
177
+ // src/engine/agent-tool/agent-prompt.ts
178
+ function buildAgentPrompt(input) {
179
+ const parts = [
180
+ "You are an autonomous execution agent. Complete the delegated task using available tools.",
181
+ "",
182
+ `## Task
183
+ ${input.task}`
184
+ ];
185
+ if (input.target) {
186
+ parts.push(`
187
+ ## Target
188
+ ${input.target}`);
189
+ }
190
+ if (input.context) {
191
+ parts.push(`
192
+ ## Context
193
+ ${input.context}`);
194
+ }
195
+ const activeProcesses = getActiveProcessSummary();
196
+ if (activeProcesses) {
197
+ parts.push(`
198
+ ## Active Background Processes
199
+ ${activeProcesses}`);
200
+ }
201
+ parts.push(`
202
+ ## Rules
203
+ - Do NOT call ask_user. You are autonomous. Make your best judgment.
204
+ - Call task_complete when the task is done (status: success, partial, or failed).
205
+ - Record findings with add_finding, loot with add_loot as you discover them.
206
+ - If you hit 3 consecutive failures on the same approach, switch vectors or declare failed.
207
+ - Be decisive \u2014 do not loop indefinitely on the same approach.`);
208
+ return parts.join("\n");
209
+ }
210
+
211
+ // src/engine/agent-tool/agent-tool.ts
212
+ var TIMEOUT_RESULT = {
213
+ status: "failed",
214
+ summary: `Agent-tool timed out: max ${MAX_AGENT_TOOL_ITERATIONS} iterations reached without task_complete.`,
215
+ tried: [],
216
+ findings: [],
217
+ loot: [],
218
+ sessions: [],
219
+ suggestedNext: "Break the task into smaller sub-tasks and retry."
220
+ };
221
+ var AgentTool = class {
222
+ constructor(state, events, scopeGuard, approvalGate) {
223
+ this.state = state;
224
+ this.events = events;
225
+ this.scopeGuard = scopeGuard;
226
+ this.approvalGate = approvalGate;
227
+ }
228
+ async execute(input) {
229
+ const completion = createCompletionBox();
230
+ const registry = new AgentRegistry(
231
+ this.state,
232
+ this.scopeGuard,
233
+ this.approvalGate,
234
+ this.events,
235
+ completion
236
+ );
237
+ const runner = new AgentRunner(
238
+ this.state,
239
+ this.events,
240
+ registry,
241
+ completion
242
+ );
243
+ const prompt = buildAgentPrompt(input);
244
+ const loopResult = await runner.run(input.task, prompt);
245
+ if (completion.done && completion.result) {
246
+ return completion.result;
247
+ }
248
+ return {
249
+ ...TIMEOUT_RESULT,
250
+ tried: [`Reached ${loopResult.iterations} iterations without calling task_complete`]
251
+ };
252
+ }
253
+ };
254
+ export {
255
+ AgentTool
256
+ };