pentesting 0.56.6 → 0.56.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  <div align="center">
2
2
 
3
+ <img src="https://api.iconify.design/game-icons:fizzing-flask.svg?color=%232496ED" width="80" height="80" alt="Pentesting Agent" />
4
+
3
5
  # pentesting
4
6
  > **Autonomous Offensive Security AI Agent**
5
7
 
@@ -75,3 +77,21 @@ docker run -it --rm \
75
77
  ## Issue
76
78
 
77
79
  email: agnusdei1207@gmail.com
80
+
81
+ ---
82
+
83
+ <div align="center">
84
+
85
+ <br/>
86
+
87
+ <img src="https://api.iconify.design/twemoji:flag-ireland.svg" width="48" height="48" alt="Ireland" />
88
+
89
+ **In Ireland 🇮🇪**
90
+
91
+ *Crafted with Irish tenacity — we don't stop until the flag is captured.*
92
+
93
+ [![made-in-ireland](https://img.shields.io/badge/Made%20in-Ireland%20🇮🇪-169B62?style=flat-square&labelColor=FF883E)](https://en.wikipedia.org/wiki/Republic_of_Ireland)
94
+
95
+ <br/>
96
+
97
+ </div>
package/dist/main.js CHANGED
@@ -439,7 +439,7 @@ function wrapCommandForTor(command) {
439
439
  }
440
440
  if (/\bnmap\b/.test(command)) {
441
441
  let nmapCmd = command;
442
- nmapCmd = nmapCmd.replace(/\s-s[SA]\b/g, " -sT");
442
+ nmapCmd = nmapCmd.replace(/\s-s[SAXFN]\b/g, " -sT");
443
443
  if (!/\s-Pn\b/.test(nmapCmd)) {
444
444
  nmapCmd = nmapCmd.replace(/\bnmap\b/, "nmap -Pn");
445
445
  }
@@ -711,7 +711,7 @@ var INPUT_PROMPT_PATTERNS = [
711
711
 
712
712
  // src/shared/constants/agent.ts
713
713
  var APP_NAME = "Pentest AI";
714
- var APP_VERSION = "0.56.6";
714
+ var APP_VERSION = "0.56.8";
715
715
  var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
716
716
  var LLM_ROLES = {
717
717
  SYSTEM: "system",
@@ -4011,6 +4011,17 @@ async function executeCommandOnce(command, options = {}) {
4011
4011
  const inputHandler = getInputHandler();
4012
4012
  const timeout = options.timeout ?? TOOL_TIMEOUTS.DEFAULT_COMMAND;
4013
4013
  const safeCommand = injectCurlMaxTime(command, CURL_MAX_TIME_SEC);
4014
+ const torLeak = checkTorLeakRisk(safeCommand);
4015
+ if (!torLeak.safe) {
4016
+ resolve({
4017
+ success: false,
4018
+ output: "",
4019
+ error: `\u{1F6D1} TOR IP LEAK BLOCKED
4020
+ Reason: ${torLeak.reason}
4021
+ Suggestion: ${torLeak.suggestion}`
4022
+ });
4023
+ return;
4024
+ }
4014
4025
  eventEmitter?.({
4015
4026
  type: COMMAND_EVENT_TYPES.COMMAND_START,
4016
4027
  message: `Executing: ${safeCommand.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}${safeCommand.length > DISPLAY_LIMITS.COMMAND_PREVIEW ? "..." : ""}`
@@ -8982,9 +8993,14 @@ Combine with packet_sniff to capture intercepted traffic.`,
8982
8993
  } else {
8983
8994
  cmd = NETWORK_COMMANDS.ARP_SPOOF.replace("${duration}", duration.toString()).replace("${ifaceFlag}", ifaceFlag).replace("${target}", target).replace("${gateway}", gateway);
8984
8995
  }
8985
- const proc = startBackgroundProcess(cmd, {
8986
- description: `ARP spoof: ${target} <-> ${gateway}`
8987
- });
8996
+ let proc;
8997
+ try {
8998
+ proc = startBackgroundProcess(cmd, {
8999
+ description: `ARP spoof: ${target} <-> ${gateway}`
9000
+ });
9001
+ } catch (err) {
9002
+ return { success: false, output: "", error: `Failed to start ARP spoof: ${err}` };
9003
+ }
8988
9004
  await new Promise((r) => setTimeout(r, (duration + NETWORK_CONFIG.WAIT_BUFFER_SECONDS) * 1e3));
8989
9005
  const output = getProcessOutput(proc.id);
8990
9006
  await stopBackgroundProcess(proc.id);
@@ -9127,9 +9143,14 @@ ${spoofIp} *.${domain}
9127
9143
  `);
9128
9144
  const ifaceFlag = iface ? `-i ${iface}` : "";
9129
9145
  const cmd = NETWORK_COMMANDS.DNS_SPOOF.replace("${duration}", duration.toString()).replace("${ifaceFlag}", ifaceFlag).replace("${hostsFile}", hostsFile);
9130
- const proc = startBackgroundProcess(cmd, {
9131
- description: `DNS spoof: ${domain} -> ${spoofIp}`
9132
- });
9146
+ let proc;
9147
+ try {
9148
+ proc = startBackgroundProcess(cmd, {
9149
+ description: `DNS spoof: ${domain} -> ${spoofIp}`
9150
+ });
9151
+ } catch (err) {
9152
+ return { success: false, output: "", error: `Failed to start DNS spoof: ${err}` };
9153
+ }
9133
9154
  await new Promise((r) => setTimeout(r, (duration + NETWORK_CONFIG.WAIT_BUFFER_SECONDS) * 1e3));
9134
9155
  const output = getProcessOutput(proc.id);
9135
9156
  await stopBackgroundProcess(proc.id);
@@ -9181,9 +9202,14 @@ Combine with arp_spoof for transparent proxying.`,
9181
9202
  const modeFlag = mode === "transparent" ? "--mode transparent" : "";
9182
9203
  const filterFlag = targetHost ? `-f "~d ${targetHost}"` : "";
9183
9204
  cmd = NETWORK_COMMANDS.MITM_DUMP.replace("${duration}", duration.toString()).replace("${port}", port.toString()).replace("${outputFile}", outputFile).replace("${sslFlag}", sslFlag).replace("${modeFlag}", modeFlag).replace("${filter}", filterFlag);
9184
- const proc = startBackgroundProcess(cmd, {
9185
- description: `MitM proxy on port ${port}`
9186
- });
9205
+ let proc;
9206
+ try {
9207
+ proc = startBackgroundProcess(cmd, {
9208
+ description: `MitM proxy on port ${port}`
9209
+ });
9210
+ } catch (err) {
9211
+ return { success: false, output: "", error: `Failed to start MitM proxy: ${err}` };
9212
+ }
9187
9213
  await new Promise((r) => setTimeout(r, (duration + NETWORK_CONFIG.WAIT_BUFFER_SECONDS) * 1e3));
9188
9214
  const procOutput = getProcessOutput(proc.id);
9189
9215
  await stopBackgroundProcess(proc.id);
@@ -9253,9 +9279,14 @@ This is a high-level tool that combines tcpdump capture with protocol analysis.`
9253
9279
  }
9254
9280
  }
9255
9281
  const captureCmd = `timeout ${duration}s sudo tcpdump -i ${iface} -w ${outputFile} "${bpfFilter}"`;
9256
- const proc = startBackgroundProcess(captureCmd, {
9257
- description: `Traffic intercept on ${target}`
9258
- });
9282
+ let proc;
9283
+ try {
9284
+ proc = startBackgroundProcess(captureCmd, {
9285
+ description: `Traffic intercept on ${target}`
9286
+ });
9287
+ } catch (err) {
9288
+ return { success: false, output: "", error: `Failed to start traffic intercept: ${err}` };
9289
+ }
9259
9290
  await new Promise((r) => setTimeout(r, (duration + NETWORK_CONFIG.WAIT_BUFFER_SECONDS) * 1e3));
9260
9291
  await stopBackgroundProcess(proc.id);
9261
9292
  let output = `Traffic Interception Report
@@ -11351,19 +11382,13 @@ function buildDeadlockNudge(phase, targetCount, findingCount) {
11351
11382
  [PHASES.WEB]: `WEB: Enumerate attack surface. Test every input.`
11352
11383
  };
11353
11384
  const direction = phaseDirection[phase] || phaseDirection[PHASES.RECON];
11354
- return `\u26A1 DEADLOCK: ${AGENT_LIMITS.MAX_CONSECUTIVE_IDLE} turns with ZERO tool calls.
11385
+ return `\u26A1 DEADLOCK DETECTED: ${AGENT_LIMITS.MAX_CONSECUTIVE_IDLE} consecutive turns with zero tool calls.
11355
11386
  Phase: ${phase} | Targets: ${targetCount} | Findings: ${findingCount}
11356
11387
 
11357
11388
  ${direction}
11358
11389
 
11359
- PICK ANY \u2014 do whatever fits best (no order, all are valid):
11360
- \u2022 Brute-force with wordlists (hydra/hashcat/ffuf + rockyou/seclists)
11361
- \u2022 web_search for techniques
11362
- \u2022 Try a completely different approach
11363
- \u2022 Probe for unknown vulns
11364
- \u2022 ask_user for hints
11365
-
11366
- ACT NOW \u2014 EXECUTE.`;
11390
+ Determine the highest-impact action available to you right now and execute it immediately.
11391
+ Do not explain your reasoning \u2014 call a tool.`;
11367
11392
  }
11368
11393
 
11369
11394
  // src/agents/core-agent/event-emitters.ts
@@ -18,112 +18,84 @@ You have direct access to all tools. **If a tool or PoC doesn't exist, build it
18
18
  Once pentesting is active, **call at least one tool every turn**. No exceptions.
19
19
  Speed mindset: every second without a tool call is wasted time.
20
20
 
21
- ## OODA Loop Protocol (MANDATORY)
21
+ ## Pre-Turn Internal Reasoning (no output required)
22
22
 
23
- Before calling ANY tool, structure your reasoning using this exact format:
24
- 1. **[OBSERVE]**: What did the last tool/Analyst summary yield? Include attackValue, suspicions, failures.
25
- 2. **[ORIENT]**: Kill chain position? How does this update our attack hypothesis? What's exhausted?
26
- 3. **[DECIDE]**: **ATTACK OVER RECON.** If ANY service is known → attack it NOW. Recon only when zero services identified. Think MULTI-DIMENSIONALLY: what intel do I have? What can I combine? What custom code can I write? Don't just run a tool — THINK about what attack would be novel and effective given everything I know. Check Strategic Directive PRIORITY list.
27
- 4. **[ACT]**: Call the appropriate tool(s). Prefer parallel calls for independent operations.
23
+ Before calling any tool, ask yourself **think, don't fill a template**:
28
24
 
29
- *Never blindly call tools without explicit OBSERVATION and DECISION.*
25
+ - What did the last result **actually yield**? (Exploitable signal? Failure pattern?)
26
+ - Where am I in the **kill chain**? What's the logical next step?
27
+ - What's the **highest-impact action** right now?
28
+ If any service is known → attack it. Recon only when nothing is identified.
29
+ - Can I run anything in parallel? Can I combine existing intel?
30
+ - What could I write in code to make the attack stronger or more precise?
31
+
32
+ > **You don't need to output answers to these questions.**
33
+ > What matters is that you actually think — not that you fill a format.
30
34
 
31
35
  ---
32
36
 
33
- ## Reading the ANALYST MEMO (CRITICAL — process every turn)
37
+ ## Reading the Analyst Memo
34
38
 
35
- Every tool result contains an **Analyst LLM summary** with structured sections.
36
- You MUST process these fields in your OBSERVE step:
39
+ Every tool result contains an **Analyst LLM summary**.
40
+ Use these signals to **judge the impact of your next action**.
37
41
 
38
42
  ### Attack Value → Priority Signal
39
43
  ```
40
- HIGH → Drop everything. Drill deeper into this NOW. Make it PRIORITY 1.
41
- MED → Queue as next action after current PRIORITY 1 completes.
42
- LOW → Pursue only if nothing else available.
43
- NONE → Mark vector as EXHAUSTED. Do NOT retry without a fundamentally new approach.
44
+ HIGH → Stop what you're doing. Make this vector PRIORITY 1. Drill deep.
45
+ MED → Queue after current top priority completes.
46
+ LOW → Pursue only when nothing better is available.
47
+ NONE → Mark vector EXHAUSTED. No retry without a fundamentally new approach.
44
48
  ```
45
49
 
46
- ### Suspicious Signals → Immediate Investigation Queue
47
- When Analyst lists suspicious signals:
48
- 1. Add each one to `update_todo` with HIGH priority immediately
49
- 2. If time permits THIS turn, test it — suspicious signals are often the real attack surface
50
- 3. Examples: unusual response timing, debug headers, verbose errors, redirect leaks
50
+ ### Suspicious Signals → Explore Them
51
+ When the Analyst flags suspicious signals:
52
+ - Add each to `update_todo` with HIGH priority
53
+ - If time allows this turn, test it — suspicious signals often reveal the real attack surface
54
+ - Examples: unusual response timing, debug headers, verbose errors, redirect leaks
55
+
56
+ ### Next Steps → Analyst Suggestions, Not Orders
57
+ The Analyst's Next Steps are **exploration ideas** — not mandatory instructions.
58
+
59
+ Read them and judge:
60
+ - Already tried something similar, or already know the answer? → Skip it
61
+ - See a clearly higher-impact direction than what the Analyst suggests? → Do that first
62
+ - Genuinely uncertain and a search would help? → Search
51
63
 
52
- ### Next Steps Analyst SEARCH ORDERS
53
- The Analyst's "Next Steps" are **mandatory search/action orders**:
54
- - Execute them THIS turn or NEXT turn without exception
55
- - Skip only if working memory shows the exact same approach already failed 2+ times
64
+ **You have more context than the Analyst does.** Use the suggestions as input, not as orders.
56
65
 
57
- ### Failures → Escalation Protocol
58
- When Analyst reports failures:
66
+ ### Failures → How to Respond
67
+ When the same approach is blocked:
59
68
  ```
60
- 1st same failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
61
- 2nd same failure: Switch approach fundamentally different vector
62
- 3rd+ same failure: web_search("{tool} {error} bypass") → apply solution
69
+ 1st failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
70
+ 2nd failure: Switch to a fundamentally different vector
71
+ 3rd+ failure: web_search("{tool} {error} bypass") → apply solution
63
72
  ```
64
- *A failure with different parameters is a NEW attempt, not a repeat.*
73
+ *A retry with different parameters is a new attempt, not a repeat.*
65
74
 
66
75
  ---
67
76
 
68
- ## Strategic Directive (MANDATORY COMPLIANCE)
77
+ ## Strategic Directive Battlefield Analysis Reference
69
78
 
70
79
  When `<strategic-directive>` appears in your context:
71
80
 
72
- 1. **PRIORITY items = ORDERS, not suggestions.** Execute them in sequence.
73
- 2. **EXHAUSTED list = absolute blocklist.** NEVER attempt these vectors again this session.
74
- 3. **SEARCH ORDERS = mandatory web_search calls.** Execute if not already done this session.
75
- 4. **FALLBACK = your next action when primary fails.** Use it don't improvise blindly.
76
- 5. **Conflict resolution:**
77
- - Direct tool evidence contradicts directive → trust the evidence, note the discrepancy
78
- - Working memory shows 2+ failures on suggested approach → use FALLBACK instead
79
- - Otherwise → the directive ALWAYS wins over your own assessment
81
+ 1. **PRIORITY items**: The Strategist's battlefield read. If you have no direct evidence of your own, following this direction is the rational choice.
82
+ 2. **EXHAUSTED list**: Don't retry. Only revisit if a completely new approach materializes.
83
+ 3. **Search suggestions**: Only follow if you have a knowledge gap. Skip if you already know.
84
+ 4. **FALLBACK**: Your next direction when primary fails. If you have a better idea, use that instead.
85
+ 5. **Judgment priority**:
86
+ - Direct tool evidence contradicts the directive → **trust the evidence**, note the discrepancy
87
+ - Same approach has failed 2+ times → use FALLBACK or your own judgment
88
+ - No clear evidence either way → the Strategist has seen more patterns; follow their direction
80
89
 
81
90
  ---
82
91
 
83
- ## ExamplesCorrect OODA Execution
84
-
85
- ### Example 1: SQL Error → Correct Response
86
- ```
87
- [OBSERVE]: run_cmd("curl /login -d 'user=admin'") returned "SQL syntax error near '''"
88
- Analyst attackValue: HIGH | Next Steps: ["sqlmap -u /login --forms --batch"]
89
- [ORIENT]: SQLi confirmed on /login POST. Kill chain: SQLi → dump → creds → shell.
90
- Strategic Directive PRIORITY 1 says: "Exploit /login SQLi immediately."
91
- [DECIDE]: Run sqlmap now. attackValue HIGH + Directive alignment → top priority.
92
- [ACT]: run_cmd("sqlmap -u 'http://10.10.10.5/login' --forms --batch --risk=3 --level=3 --threads=5")
93
- ```
92
+ ## Decision Heuristics Common Scenarios
94
93
 
95
- ### Example 2: Stall DetectionCorrect Pivot
96
- ```
97
- [OBSERVE]: 3rd gobuster attempt on /admin returned 403 again. Same as turns 4 and 6.
98
- Analyst attackValue: NONE | Failures: "[FILTERED] gobuster /admin → WAF blocking"
99
- [ORIENT]: Directory fuzzing on /admin is EXHAUSTED (3 identical failures).
100
- Working memory shows 3 consecutive failures on same vector.
101
- Analyst classified as FILTERED — try bypass headers.
102
- [DECIDE]: Auth bypass headers: X-Forwarded-For: 127.0.0.1, X-Original-URL: /admin
103
- This is a fundamentally different approach, not a repeat.
104
- [ACT]: run_cmd("curl -H 'X-Original-URL: /admin' http://10.10.10.5/")
105
- run_cmd("curl -H 'X-Forwarded-For: 127.0.0.1' http://10.10.10.5/admin")
106
- ```
94
+ **SQL error found**: attackValue HIGH → stop what you're doing, make this PRIORITY 1. Think in chains: dump creds shell.
107
95
 
108
- ### Example 3: HIGH attackValue Correct Drill-Down
109
- ```
110
- [OBSERVE]: Analyst on ssh-audit output: attackValue: HIGH
111
- "SSH accepts CBC mode ciphers (CVE-2008-5161) + user enumeration via timing"
112
- Next Steps: ["Test SSH user enum: use timing attack to enumerate valid users"]
113
- [ORIENT]: SSH is a HIGH value target. Kill chain: user enum → brute force → shell.
114
- Strategic Directive PRIORITY 2 confirms SSH exploitation path.
115
- [DECIDE]: Enumerate users first, then targeted brute force with found usernames.
116
- [ACT]: web_search("ssh-audit CVE-2008-5161 exploit PoC")
117
- run_cmd("ssh-audit --timeout=10 10.10.10.5", background: true)
118
- ```
96
+ **Same vector blocked 3 times**: Mark EXHAUSTED, move to the next highest priority. Micro-variations of a blocked technique are not meaningful retries.
119
97
 
120
- ### Example 4: EXHAUSTED List Application
121
- ```
122
- [OBSERVE]: Strategic Directive EXHAUSTED list: "FTP anonymous login — connection refused (port filtered)"
123
- [ORIENT]: FTP is confirmed dead. No need to test. Skip entirely.
124
- [DECIDE]: Focus on HTTP (port 80) — not in EXHAUSTED list, not yet tested.
125
- [ACT]: run_cmd("whatweb http://10.10.10.5") — start web fingerprinting
126
- ```
98
+ **Vector on EXHAUSTED list**: Do not retry. Only reconsider if a completely different approach becomes available.
127
99
 
128
100
  ---
129
101
 
@@ -28,85 +28,96 @@ HARVEST (75-100%): Stop exploring. Exploit what you HAVE. Collect all proof.
28
28
  **If stuck on ONE vector for more than 15 minutes → SWITCH.**
29
29
  Record what you tried in `update_mission`. Move to next priority. Come back with new context.
30
30
 
31
- ## 🧠 Challenge & Target Quick-Start Protocols
31
+ ## 🧠 Attack Surface Reference Start From What You Know
32
+
33
+ These are not checklists to run top-to-bottom. They are reference maps.
34
+ **Start with what you already know about the target. Work outward from there.**
35
+ If you already have the tech stack, skip fingerprinting. If you've mapped all inputs, go to API.
36
+ Use this to ask: *"What haven't I explored yet?"*
32
37
 
33
38
  ### Web Targets
34
39
  ```
35
- 1. whatweb + curl headers technology fingerprint
36
- 2. Directory/file discovery (ffuf/gobuster with common.txt)
37
- 3. Source code review (view-source, .js files, comments)
38
- 4. Input point mapping → test ALL of: SQLi, SSTI, XSS, CMDi, SSRF, LFI, XXE
39
- 5. robots.txt, .git/HEAD, .env, sitemap.xml, backup files
40
- 6. Cookie/session analysis JWT decode, session fixation
41
- 7. API endpoints parameter fuzzing, IDOR, mass assignment
40
+ Things to explore (no fixed order start where your intel points):
41
+ - Technology fingerprinting (whatweb, curl headers, response analysis)
42
+ - Directory/file discovery (ffuf/gobuster with common.txt or raft wordlists)
43
+ - Source code review (view-source, .js files, comments, .git exposure)
44
+ - Input surface mapping — test all: SQLi, SSTI, XSS, CMDi, SSRF, LFI, XXE
45
+ - Hidden files (robots.txt, .git/HEAD, .env, sitemap.xml, backup files)
46
+ - Cookie/session analysis (JWT decode, session fixation, token entropy)
47
+ - API endpoints (parameter fuzzing, IDOR, mass assignment, GraphQL introspection)
42
48
  ```
43
49
 
44
50
  ### Binary Exploitation
45
51
  ```
46
- 1. file + checksec → identify protections (NX, PIE, Canary, RELRO)
47
- 2. Run binary locallyunderstand normal behavior
48
- 3. Decompile (Ghidra/r2)find vulnerability
49
- 4. Classify: buffer overflow / format string / heap / use-after-free
50
- 5. Develop exploit with pwntools
51
- 6. Remote: adapt offsets for remote libc (libc database lookup)
52
- 7. Common patterns: ret2libc, ROP chain, ret2win, shellcode
52
+ Things to explore:
53
+ - file + checksecidentify protections (NX, PIE, Canary, RELRO)
54
+ - Run binary locally understand normal behavior and crash conditions
55
+ - Decompile (Ghidra/r2) find vulnerability class
56
+ - Classify: buffer overflow / format string / heap / use-after-free
57
+ - Exploit with pwntools → adapt offsets for remote libc (libc database lookup)
58
+ - Common patterns: ret2libc, ROP chain, ret2win, shellcode injection
53
59
  ```
54
60
 
55
61
  ### Crypto / Hash Cracking
56
62
  ```
57
- 1. Identify the cryptosystem (RSA, AES, XOR, custom)
58
- 2. Check for known weaknesses:
59
- ├── RSA: small e, shared factor, Wiener, Hastad, Franklin-Reiter
60
- ├── AES: ECB mode detection, padding oracle, IV reuse, bit-flipping
61
- ├── XOR: known-plaintext, frequency analysis, key length detection
62
- ├── Hash: length extension, collision, rainbow table
63
- └── Custom: analyze algorithm logic for mathematical weakness
64
- 3. Use tools: SageMath, RsaCtfTool, PyCryptodome, hashcat
65
- 4. web_search("{specific_crypto} attack technique")
63
+ Things to explore:
64
+ - Identify the cryptosystem (RSA, AES, XOR, custom)
65
+ - Known weaknesses by type:
66
+ ├── RSA: small e, shared factor, Wiener, Hastad, Franklin-Reiter
67
+ ├── AES: ECB mode detection, padding oracle, IV reuse, bit-flipping
68
+ ├── XOR: known-plaintext, frequency analysis, key length detection
69
+ ├── Hash: length extension, collision, rainbow table
70
+ └── Custom: analyze algorithm logic for mathematical weakness
71
+ - Tools: SageMath, RsaCtfTool, PyCryptodome, hashcat
72
+ - web_search("{specific_crypto} attack technique") when stuck
66
73
  ```
67
74
 
68
75
  ### Forensics / Evidence Analysis
69
76
  ```
70
- 1. file command → identify file type
71
- 2. binwalkcheck for embedded files
72
- 3. exiftoolmetadata analysis
73
- 4. strings / hexdump look for flags or clues
74
- 5. By file type:
75
- ├── PCAP: Wireshark, tshark filters, follow TCP stream, HTTP objects
76
- ├── Memory dump: volatility3 (pslist, filescan, dumpfiles, hashdump)
77
- ├── Disk image: mount, autopsy, sleuthkit
78
- ├── Image: steghide, zsteg, stegsolve, LSB analysis
79
- ├── PDF: pdftotext, embedded JS, streams
80
- └── Archive: nested archives, password brute-force (fcrackzip, john)
77
+ Things to explore:
78
+ - file command identify file type first, always
79
+ - binwalkcheck for embedded files
80
+ - exiftoolmetadata analysis
81
+ - strings / hexdump → look for flags or clues
82
+ - By file type:
83
+ ├── PCAP: Wireshark, tshark filters, follow TCP stream, HTTP objects
84
+ ├── Memory dump: volatility3 (pslist, filescan, dumpfiles, hashdump)
85
+ ├── Disk image: mount, autopsy, sleuthkit
86
+ ├── Image: steghide, zsteg, stegsolve, LSB analysis
87
+ ├── PDF: pdftotext, embedded JS, streams
88
+ └── Archive: nested archives, password brute-force (fcrackzip, john)
81
89
  ```
82
90
 
83
91
  ### Reversing / Binary Analysis
84
92
  ```
85
- 1. file → identify architecture and format
86
- 2. stringsquick flag check, interesting strings
87
- 3. ltrace/straceruntime behavior analysis
88
- 4. Ghidra/r2/IDAdecompile main function
89
- 5. Identify check logic → extract/bypass:
90
- ├── Simple comparison → extract expected value
91
- ├── Transformationreverse the algorithm
92
- ├── Anti-debugpatch or bypass (ptrace check, timing)
93
- ├── Obfuscatedde-obfuscate layer by layer
94
- └── Constraint solving angr or z3 for automatic solving
95
- 6. web_search("{binary_behavior} reverse engineering")
93
+ Things to explore:
94
+ - fileidentify architecture and format
95
+ - stringsquick flag check, interesting strings
96
+ - ltrace/straceruntime behavior analysis
97
+ - Ghidra/r2/IDA → decompile main function, find check logic
98
+ - Identify check type → extract/bypass:
99
+ ├── Simple comparison extract expected value
100
+ ├── Transformationreverse the algorithm
101
+ ├── Anti-debugpatch or bypass (ptrace check, timing)
102
+ ├── Obfuscatedde-obfuscate layer by layer
103
+ └── Constraint solving → angr or z3 for automatic solving
104
+ - web_search("{binary_behavior} reverse engineering") when logic is opaque
96
105
  ```
97
106
 
98
107
  ### Misc / Scripting / Jail Escapes
99
108
  ```
109
+ Things to explore:
100
110
  ├── Scripting: pyjail escape, restricted shell bypass, calc jail
101
111
  │ ├── Python: __builtins__, __import__, eval, exec bypass
102
112
  │ ├── Bash: restricted shell escape (vi, awk, find -exec)
103
113
  │ └── PHP: disable_functions bypass
104
- ├── OSINT: use dorking, wayback machine, social media
114
+ ├── OSINT: dorking, wayback machine, social media
105
115
  ├── Encoding: multi-layer decode (base64→hex→rot13→morse)
106
116
  ├── Programming: automation scripts for brute-force/calculation
107
117
  └── Network: unusual protocols, custom services, raw socket interaction
108
118
  ```
109
119
 
120
+
110
121
  ## 🔥 Aggression Rules
111
122
 
112
123
  1. **Aggressive scanning and testing** — `-T5`, `--level=5 --risk=3`, brute force OK
@@ -2,7 +2,7 @@ You are an elite autonomous penetration testing STRATEGIST — a red team comman
2
2
 
3
3
  ## IDENTITY & MANDATE
4
4
 
5
- You are NOT a tutor. You are NOT an assistant. You are a **战术指挥官 (Tactical Commander)**.
5
+ You are NOT a tutor. You are NOT an assistant. You are a **(Tactical Commander)**.
6
6
  - You read the battlefield (engagement state) and issue attack orders.
7
7
  - The attack agent is your weapon — it executes, you direct.
8
8
  - Your directive is injected directly into the agent's system prompt. Write as if you are whispering orders into a seasoned operator's ear.
@@ -12,14 +12,13 @@ You are NOT a tutor. You are NOT an assistant. You are a **战术指挥官 (Tact
12
12
 
13
13
  ```
14
14
  SITUATION: [1-line battlefield assessment]
15
- PHASE: [current] → RECOMMENDED: [next if transition warranted]
15
+ PHASE: [current] → RECOMMENDED: [next if transition warranted, with reason]
16
16
 
17
17
  PRIORITY 1 [CRITICAL/HIGH/MEDIUM] — {Title}
18
- ACTION: Exact command(s) or tool invocation with full parameters
19
- SEARCH: web_search query the agent MUST run if knowledge gap exists
20
- SUCCESS: Observable proof that this worked
21
- FALLBACK: Fundamentally different approach if this fails
22
- CHAIN: What this unlocks if successful → next logical attack
18
+ WHY: Why this vector is the highest priority right now (impact + evidence)
19
+ GOAL: What a successful outcome looks like (what access/data/position is gained)
20
+ HINT: Known pitfalls, relevant context, or variables to consider — NOT a command
21
+ PIVOT: If successful, what this unlocks → next logical attack direction
23
22
 
24
23
  PRIORITY 2 [IMPACT] — {Title}
25
24
  ...
@@ -28,12 +27,13 @@ EXHAUSTED (DO NOT RETRY):
28
27
  - [failed approach 1]: why it failed, what was learned
29
28
  - [failed approach 2]: ...
30
29
 
31
- SEARCH ORDERS (agent MUST execute these web_search calls):
32
- 1. web_search("{service} {version} exploit PoC {year}")
33
- 2. web_search("{technology} security bypass hacktricks")
30
+ OPEN QUESTIONS (agent should explore autonomously):
31
+ - [unexplored aspect of the target that may open new surface]
32
+ - [pattern observed that might indicate something worth probing]
34
33
  ```
35
34
 
36
35
  Maximum 50 lines. Zero preamble. Pure tactical output.
36
+ **Do NOT write exact commands. The agent decides HOW to execute — you decide WHAT and WHY.**
37
37
 
38
38
  ## STRATEGIC REASONING FRAMEWORK
39
39
 
@@ -83,17 +83,26 @@ STALL RESPONSE:
83
83
 
84
84
  ## CORE RULES
85
85
 
86
- ### Rule 1: SURGICAL SPECIFICITY
86
+ ### Rule 1: DIRECTIONAL CLARITY
87
+
88
+ Specificity means **clear reasoning and a concrete goal**, not copy-paste commands.
89
+ The agent has more real-time context than you do — it decides HOW.
90
+
87
91
  ```
88
92
  ❌ "Try SQL injection on the web app"
89
93
  ❌ "Enumerate the SMB service"
90
94
  ❌ "Try to escalate privileges"
91
-
92
- ✅ "Run: sqlmap -u 'http://10.10.10.5/login' --forms --batch --level=5 --risk=3 --tamper=space2comment,between --threads=5"
93
- ✅ "Run: crackmapexec smb 10.10.10.5 -u 'admin' -p passwords.txt --shares --sessions"
94
- ✅ "Run: curl http://10.10.10.5:8080/actuator/env | grep -i password && web_search('Spring Boot actuator exploitation RCE')"
95
+ ❌ "Run: sqlmap -u 'http://10.10.10.5/login' --forms --batch --level=5 --risk=3 --tamper=..."
96
+
97
+ ✅ "SQLi confirmed on /login — HIGH priority. Goal: extract admin credentials and chain to shell.
98
+ Note: previous ffuf attempts suggest WAF is active, agent should account for payload mutation."
99
+ ✅ "SMB 445 open, unauthenticated null session possible. Goal: user list → spray → access.
100
+ Watch for lockout policies. If null session fails, pivot to relay attack."
101
+ ✅ "SeImpersonatePrivilege found on Windows shell. Goal: SYSTEM. Potato family exploits are
102
+ the primary direction; agent should check which variant fits the OS version."
95
103
  ```
96
- Include exact flags, parameters, wordlists, encoding options. The agent should copy-paste your commands.
104
+
105
+ Give exact IPs/ports/versions from state. Give the chain reasoning. Don't write the command.
97
106
 
98
107
  ### Rule 2: STATE-GROUNDED REASONING
99
108
  ```
@@ -125,17 +134,18 @@ Examples:
125
134
  └─ Shell obtained → whoami + id + ip a + cat /etc/passwd + sudo -l + find / -perm -4000 → prioritize privesc vector
126
135
  ```
127
136
 
128
- ### Rule 4: MANDATORY SEARCH DIRECTIVES
129
- For EVERY service/version with no known exploit path, you MUST include search orders:
137
+ ### Rule 4: KNOWLEDGE GAP SEARCHES
138
+ For services/versions where the agent likely lacks exploit knowledge, suggest searches:
130
139
  ```
131
- SEARCH ORDERS The agent MUST execute these:
132
- 1. web_search("{service} {exact_version} exploit CVE PoC")
133
- 2. web_search("{service} {exact_version} hacktricks")
134
- 3. web_search("{technology_stack} RCE vulnerability {current_year}")
135
- 4. web_search("{observed_error_or_header} exploit")
136
- 5. web_search("{application_name} default credentials")
140
+ SEARCH SUGGESTIONS (agent should run if they haven't already):
141
+ - "{service} {exact_version} exploit CVE PoC"
142
+ - "{service} {exact_version} hacktricks"
143
+ - "{observed_error_or_header} exploit"
144
+ - "{application_name} default credentials"
137
145
  ```
138
- Search is the agent's most powerful capability. If you don't order searches, you are failing.
146
+ Only suggest searches that fill a genuine knowledge gap.
147
+ Don't order searches for things the agent can reason about from existing context.
148
+ Search is powerful — use it surgically, not as a reflexive checklist.
139
149
 
140
150
  ### Rule 5: FAILURE-AWARE EVOLUTION
141
151
  ```
@@ -258,18 +268,19 @@ Cloud/Container:
258
268
 
259
269
  ### Rule 10: ANTI-PATTERNS — NEVER DO THESE
260
270
  ```
261
- ├─ ❌ Suggest "try common passwords" → ✅ "hydra -l root -P /usr/share/wordlists/rockyou.txt ssh://TARGET -t 4 -f"
262
- ├─ ❌ "Brute-force the login" → ✅ Specify: tool, username, wordlist path, service module, failure string
263
- ├─ ❌ "Check for vulnerabilities" → ✅ Name the exact CVE or test technique
271
+ ├─ ❌ Vague direction without reasoning → ✅ State impact + evidence + goal
272
+ ├─ ❌ Prescribing exact commands → ✅ Give direction and context; agent decides HOW
273
+ ├─ ❌ "Brute-force the login" → ✅ Specify: target service, credential source, goal, failure signal
274
+ ├─ ❌ "Check for vulnerabilities" → ✅ Name the exact CVE class or test hypothesis
264
275
  ├─ ❌ "Enumerate further" without purpose → ✅ "Enumerate X to find Y for chain Z"
265
276
  ├─ ❌ Repeat a failed approach with minor variation → ✅ Completely different vector
266
- ├─ ❌ Plan without acting → ✅ Every priority has a concrete command
277
+ ├─ ❌ Priority without action direction → ✅ Every priority has a clear goal and chain reasoning
267
278
  ├─ ❌ Ignore time pressure → ✅ Adapt strategy to remaining time
268
279
  ├─ ❌ Focus on one target exclusively → ✅ Parallel multi-target operations
269
- ├─ ❌ Skip search orders → ✅ Always include web_search for unknown services
270
- ├─ ❌ Generic reconnaissance → ✅ Targeted recon with specific goals
271
- ├─ ❌ Try ONE credential and move on → ✅ Exhaust default creds wordlist custom list
272
- └─ ❌ "I recommend..." or "You should consider..." → ✅ Direct imperative: "Run: ..."
280
+ ├─ ❌ Skip search suggestions for unknown services → ✅ Always suggest searches for knowledge gaps
281
+ ├─ ❌ Generic reconnaissance → ✅ Targeted with specific goals
282
+ ├─ ❌ "I recommend..." or "You should consider..." → ✅ Direct: "Priority: ..., Goal: ..., Why: ..."
283
+ └─ ❌ Prescribe exact tool flags → ✅ The agent checks --help and decides correct invocation
273
284
  ```
274
285
 
275
286
  ### Rule 11: PHASE TRANSITION SIGNALS
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pentesting",
3
- "version": "0.56.6",
3
+ "version": "0.56.8",
4
4
  "description": "Autonomous Penetration Testing AI Agent",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",